home *** CD-ROM | disk | FTP | other *** search
- # libwhisker v1.7
- # libwhisker is a collection of routines used by whisker
-
- #
-
- # libwhisker copyright 2000,2001,2002 rfp.labs
-
- #
-
- # This program is free software; you can redistribute it and/or
-
- # modify it under the terms of the GNU General Public License
-
- # as published by the Free Software Foundation; either version 2
-
- # of the License, or (at your option) any later version.
-
- #
-
- # This program is distributed in the hope that it will be useful,
-
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
-
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-
- # GNU General Public License for more details.
-
- #
-
- #
-
- # More information can be found at http://www.wiretrip.net/rfp/
-
- # Libwhisker mailing list and resources are also available at
-
- # http://sourceforge.net/projects/whisker/
-
- #
-
-
-
- package LW;
-
- use 5.004;
-
- $LW::VERSION="1.7";
-
-
- ####### external module tests ###################################
-
-
-
- BEGIN {
-
-
-
- ## LW module manager stuff ##
-
-
-
- %LW::available = ();
-
- $LW::LW_HAS_SOCKET = 0;
-
- $LW::LW_HAS_SSL = 0;
-
- $LW::LW_SSL_LIB = 0;
-
- $LW::LW_NONBLOCK_CONNECT= 0;
-
-
-
- ## binary helper - may contain functions substituted further down ##
-
- eval "use LW::bin"; # do we have libwhisker binary helpers?
-
- if($@){ $LW::available{'LW::bin'}=$LW::bin::VERSION; }
-
-
-
- ## encode subpackage ##
-
- eval "require MIME::Base64";
-
- if($@){
-
- *encode_base64 = \&encode_base64_perl;
-
- *decode_base64 = \&decode_base64_perl;
-
- } else{
-
- # MIME::Base64 typically has faster C versions
-
- $LW::available{'mime::base64'}=$MIME::Base64::VERSION;
-
- *encode_base64 = \&MIME::Base64::encode_base64;
-
- *decode_base64 = \&MIME::Base64::decode_base64;}
-
-
-
- ## md5 subpackage ##
-
- eval "require MD5";
-
- if(!$@){ $LW::available{'md5'}=$MD5::VERSION;}
-
-
-
- ## http subpackage ##
-
- eval "use Socket"; # do we have socket support?
-
- if($@){ $LW::LW_HAS_SOCKET=0; }
-
- else { $LW::LW_HAS_SOCKET=1;
-
- $LW::available{'socket'}=$Socket::VERSION;}
-
-
-
- if($LW_HAS_SOCKET){
-
- eval "use Net::SSLeay"; # do we have SSL support?
-
- if($@){ $LW::LW_HAS_SSL=0; }
-
- else { $LW::LW_HAS_SSL=1;
-
- $LW::LW_SSL_LIB=1;
-
- $LW::available{'net::ssleay'}=$Net::SSLeay::VERSION;
-
- Net::SSLeay::load_error_strings();
-
- Net::SSLeay::SSLeay_add_ssl_algorithms();
-
- Net::SSLeay::randomize();}
-
- if(!$LW::LW_HAS_SSL){
-
- eval "use Net::SSL"; # different SSL lib
-
- if($@){ $LW::LW_HAS_SSL=0; }
-
- else { $LW::LW_HAS_SSL=1;
-
- $LW::LW_SSL_LIB=2;
-
- $LW::available{'net::ssl'}=$Net::SSL::VERSION;}
-
- }
-
-
-
- ## non-blocking IO ##
-
-
-
- if($^O!~/Win32/){
-
- eval "use POSIX qw(:errno_h :fcntl_h)"; # better
-
- if(!$@){
-
- $LW::LW_NONBLOCK_CONNECT=1;
-
- }
-
- }
-
-
-
- } # if($LW_HAS_SOCKET)
-
-
-
- } # BEGIN
-
-
-
- ####### package variables #######################################
-
-
-
- ## crawl subpackage ##
-
- %LW::crawl_config=( 'save_cookies' => 0,
-
- 'reuse_cookies' => 1,
-
- 'save_offsites' => 0,
-
- 'follow_moves' => 1,
-
- 'url_limit' => 1000,
-
- 'use_params' => 0,
-
- 'params_double_record' => 0,
-
- 'skip_ext' => '.gif .jpg .gz .mp3 .swf .zip ',
-
- 'save_skipped' => 0,
-
- 'save_referrers'=> 0,
-
- 'do_head' => 0,
-
- 'callback' => 0,
-
- 'slashdot_bug' => 1,
-
- 'normalize_uri' => 1,
-
- 'source_callback' => 0
-
- );
-
-
-
-
-
- @LW::crawl_urls=();;
-
- %LW::crawl_server_tags=();
-
- %LW::crawl_referrers=();
-
- %LW::crawl_offsites=();
-
- %LW::crawl_cookies=();
-
- %LW::crawl_forms=();
-
- %LW::crawl_temp=();
-
-
-
- # this idea/structure was taken from HTML::LinkExtor.pm,
-
- # copyright 2000 Gisle Aas and Michael A. Chase
-
- %LW::crawl_linktags = (
-
- 'a' => 'href',
-
- 'applet' => [qw(codebase archive code)],
-
- 'area' => 'href',
-
- 'base' => 'href',
-
- 'bgsound' => 'src',
-
- 'blockquote' => 'cite',
-
- 'body' => 'background',
-
- 'del' => 'cite',
-
- 'embed' => [qw(src pluginspage)],
-
- 'form' => 'action',
-
- 'frame' => [qw(src longdesc)],
-
- 'iframe' => [qw(src longdesc)],
-
- 'ilayer' => 'background',
-
- 'img' => [qw(src lowsrc longdesc usemap)],
-
- 'input' => [qw(src usemap)],
-
- 'ins' => 'cite',
-
- 'isindex' => 'action',
-
- 'head' => 'profile',
-
- 'layer' => [qw(background src)],
-
- 'link' => 'href',
-
- 'object' => [qw(codebase data archive usemap)],
-
- 'q' => 'cite',
-
- 'script' => 'src',
-
- 'table' => 'background',
-
- 'td' => 'background',
-
- 'th' => 'background',
-
- 'xmp' => 'href',
-
- );
-
-
-
-
-
- ## forms subpackage ##
-
- @LW::forms_found=();
-
- %LW::forms_current=();
-
-
-
-
-
- ## http subpackage ##
-
- my $SOCKSTATE=0;
-
- my $TIMEOUT=10; # default
-
- my ($STATS_REQS,$STATS_SYNS)=(0,0);
-
- my ($LAST_HOST,$LAST_INET_ATON,$LAST_SSL)=('','',0);
-
- my ($OUTGOING_QUEUE,$INCOMING_QUEUE)=('','');
-
- my ($SSL_CTX, $SSL_THINGY);
-
-
-
- my %http_host_cache=();
-
- # order is following:
-
- # [0] - SOCKET
-
- # [1] - $SOCKSTATE
-
- # [2] - INET_ATON
-
- # [3] - $SSL_CTX
-
- # [4] - $SSL_THINGY
-
- # [5] - $OUTGOING_QUEUE
-
- # [6] - $INCOMING_QUEUE
-
- # [7] - $STATS_SYNS
-
- # [8] - $STATS_REQS
-
-
-
- my $Z; # array ref to current host specs
-
-
-
- =pod
-
-
-
-
-
- =head1 ++ Sub package: anti-ids
-
-
-
- The anti-ids sub package implements management routines for various
-
- rewriting/encoding in order to evade intrusion detection systems.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::anti_ids
-
-
-
- Params: \%hin, $modes
-
- Return: nothing
-
-
-
- LW::anti_ids computes the proper anti-ids encoding/tricks specified by
-
- $modes, and sets up %hin in order to use those tricks. Valid modes
-
- are (the mode numbers are the same as those found in whisker 1.4):
-
-
-
- 1 - Encode some of the characters via normal URL encoding
-
- 2 - Insert directory self-references (/./)
-
- 3 - Premature URL ending (make it appear the request line is done)
-
- 4 - Prepend a long random string in the form of "/string/../URL"
-
- 5 - Add a fake URL parameter
-
- 6 - Use a tab instead of a space as a request spacer
-
- 7 - Change the case of the URL around (works against Windows and Novell)
-
- 8 - Change normal seperators ('/') to Windows version ('\')
-
- 9 - Session splicing (sending data in multiple packets)
-
-
-
- You can set multiple modes by setting the string to contain all the modes
-
- desired; i.e. $modes="146" will use modes 1, 4, and 6.
-
-
-
- =cut
-
-
-
-
-
- sub anti_ids {
-
- my ($rhin,$modes)=(shift,shift);
-
- my (@T,$x,$c,$s,$y);
-
- my $ENCODED=0;
-
- my $W = $$rhin{'whisker'};
-
-
-
- return if(!(defined $rhin && ref($rhin)));
-
-
-
- # in case they didn't do it already
-
- $$rhin{'whisker'}->{'uri_orig'}=$$rhin{'whisker'}->{'uri'};
-
-
-
- # note: order is important!
-
-
-
- # mode 9 - session splicing
-
- if($modes=~/9/){
-
- $$rhin{'whisker'}->{'ids_session_splice'}=1;
-
- }
-
-
-
- # mode 4 - prepend long random string
-
- if($modes=~/4/){$s='';
-
- if($$W{'uri'}=~m#^/#){
-
- $y=&utils_randstr;
-
- $s.=$y while(length($s)<512);
-
- $$W{'uri'}="/$s/..".$$W{'uri'};
-
- }
-
- }
-
-
-
- # mode 7 - (windows) random case sensitivity
-
- if($modes=~/7/){
-
- @T=split(//,$$W{'uri'});
-
- for($x=0;$x<(scalar @T);$x++){
-
- if((rand()*2)%2 == 1){
-
- $T[$x]=uc($T[$x]);}}
-
- $$W{'uri'}=join('',@T);
-
- }
-
-
-
- # mode 2 - directory self-reference (/./)
-
- if($modes=~/2/){
-
- $$W{'uri'}=~s#/#/./#g;
-
- }
-
-
-
-
-
- # mode 8 - windows directory separator (\)
-
- if($modes=~/8/){
-
- $$W{'uri'}=~s#/#\\#g;
-
- $$W{'uri'}=~s#^\\#/#;
-
- $$W{'uri'}=~s#^(http|file|ftp|nntp|news|telnet):\\#$1://#;
-
- $$W{'uri'}=~s#\\$#/#;
-
- }
-
-
-
- # mode 1 - random URI (non-UTF8) encoding
-
- if($modes=~/1/){
-
- if($ENCODED==0){
-
- $$W{'uri'}=encode_str2ruri($$W{'uri'});
-
- $ENCODED=1;}
-
- }
-
-
-
-
-
- # mode 5 - fake parameter
-
- if($modes=~/5/){
-
- ($s,$y)=(&utils_randstr,&utils_randstr);
-
- $$W{'uri'}="/$s.html%3f$y=/../$$W{'uri'}";
-
- }
-
-
-
- # mode 3 - premature URL ending
-
- if($modes=~/3/){
-
- $s=&utils_randstr;
-
- $$W{'uri'}="/%20HTTP/1.1%0D%0A%0D%0AAccept%3A%20$s/../..$$W{'uri'}";
-
- }
-
-
-
- # mode 6 - TAB as request spacer
-
- if($modes=~/6/){
-
- $$W{'req_spacer'}="\t";
-
- }
-
-
-
- } # end anti_ids
-
-
-
-
-
-
-
-
-
- =pod
-
-
-
-
-
- =head1 ++ Sub package: auth
-
-
-
- The auth sub package implements HTTP authentication routines.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::auth_brute_force
-
-
-
- Params: $auth_method, \%hin, $user, \@passwords [, $domain]
-
- Return: $first_valid_password, undef if error/none found
-
-
-
- Perform a HTTP authentication brute force against a server (host and URI
-
- defined in %hin). It will try every password in the password array for
-
- the given user. The first password (in conjunction with the given user)
-
- that doesn't return HTTP 401 is returned (and the brute force is stopped
-
- at that point). $domain is optional, and is only used for NTLM auth.
-
-
-
- =cut
-
-
-
- sub auth_brute_force {
-
- my ($auth_method, $hrin, $user, $pwordref, $dom)=@_;
-
- my ($P,%hout);
-
-
-
- return undef if(!defined $auth_method || length($auth_method)==0);
-
- return undef if(!defined $user || length($user) ==0);
-
- return undef if(!(defined $hrin && ref($hrin) ));
-
- return undef if(!(defined $pwordref && ref($pwordref)));
-
-
-
- map {
-
- ($P=$_)=~tr/\r\n//d;
-
- auth_set_header($auth_method,$hrin,$user,$P,$dom);
-
- return undef if(http_do_request($hrin,\%hout));
-
- return $P if($hout{'whisker'}->{'http_resp'} ne 401);
-
- } @$pwordref;
-
-
-
- return undef;}
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::auth_set_header
-
-
-
- Params: $auth_method, \%hin, $user, $password [, $domain]
-
- Return: nothing (modifies %hin)
-
-
-
- Set the appropriate authentication header in %hin.
-
-
-
- NOTE: right now only BASIC and NTLM are supported.
-
-
-
- =cut
-
-
-
- sub auth_set_header {
-
- my ($method, $href, $user, $pass, $domain)=(lc(shift),@_);
-
-
-
- return if(!(defined $href && ref($href)));
-
- return if(!defined $user || !defined $pass);
-
-
-
- if($method eq 'basic'){
-
- $$href{'Authorization'}='Basic '.encode_base64($user.':'.$pass,'');
-
- }
-
-
-
- if($method eq 'proxy-basic'){
-
- $$href{'Proxy-Authorization'}='Basic '.encode_base64($user.':'.$pass,'');
-
- }
-
-
-
- if($method eq 'ntlm'){
-
- my $o=ntlm_new($user,$pass,$domain);
-
- $$href{'whisker'}->{'ntlm_obj'}=$o;
-
- $$href{'whisker'}->{'ntlm_step'}=0;
-
- $$href{'Authorization'}='NTLM '.ntlm_client($o);
-
- }
-
-
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::do_auth
-
-
-
- Params: $auth_method, \%hin, $user, $password [, $domain]
-
- Return: nothing (modifies %hin)
-
-
-
- This is an alias for auth_set_header().
-
-
-
- =cut
-
-
-
- sub do_auth {
-
- goto &auth_set_header;
-
- }
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: bruteurl
-
-
-
- The bruteurl sub package is used to perform a brute-force of HTTP
-
- requests on an array of string components.
-
-
-
- =cut
-
-
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::bruteurl
-
-
-
- Params: \%hin, $pre, $post, \@values_in, \@values_out
-
- Return: Nothing (adds to @out)
-
-
-
- Bruteurl will perform a brute force against the host/server specified in
-
- %hin. However, it will make one request per entry in @in, taking the
-
- value and setting $hin{'whisker'}->{'uri'}= $pre.value.$post. Any URI
-
- responding with an HTTP 200 or 403 response is pushed into @out. An
-
- example of this would be to brute force usernames, putting a list of
-
- common usernames in @in, setting $pre='/~' and $post='/'.
-
-
-
- =cut
-
- sub bruteurl {
-
- my ($hin, $upre, $upost, $arin, $arout)=@_;
-
- my ($U,%hout);
-
-
-
- return if(!(defined $hin && ref($hin) ));
-
- return if(!(defined $arin && ref($arin) ));
-
- return if(!(defined $arout && ref($arout)));
-
- return if(!defined $upre || length($upre) ==0);
-
- return if(!defined $upost || length($upost)==0);
-
-
-
- http_fixup_request($hin);
-
-
-
- map {
-
- ($U=$_)=~tr/\r\n//d; next if($U eq '');
-
- if(!http_do_request($hin,\%hout,{'uri'=>$upre.$U.$upost})){
-
- if( $hout{'whisker'}->{'http_resp'}==200 ||
-
- $hout{'whisker'}->{'http_resp'}==403){
-
- push(@{$arout},$U);
-
- }
-
- }
-
- } @$arin;
-
- }
-
-
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: cookie
-
-
-
- Cookie handling functions.
-
-
-
- Cookies are stored in a "jar" (hash), indexed by cookie name. The
-
- contents are an anonymous array:
-
-
-
- $jar{'name'}=@( 'value', 'domain', 'path', 'expire', 'secure' )
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::cookie_read
-
-
-
- Params: \%jar, \%hout
-
- Return: $num_of_cookies_read
-
-
-
- Read in cookies from an %hout hash (HTTP response), and put them in %jar.
-
-
-
- =cut
-
-
-
- sub cookie_read {
-
- my ($count,$jarref,$href)=(0,@_);
-
-
-
- return 0 if(!(defined $jarref && ref($jarref)));
-
- return 0 if(!(defined $href && ref($href) ));
-
-
-
- my $target = utils_find_lowercase_key($href,'set-cookie');
-
-
-
- if(!defined $target){
-
- return 0;}
-
-
-
- if(ref($target)){ # multiple headers
-
- foreach (@{$target}){
-
- cookie_parse($jarref,$_);
-
- $count++; }
-
- } else { # single header
-
- cookie_parse($jarref,$target);
-
- $count=1; }
-
-
-
- return $count;
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::cookie_parse
-
-
-
- Params: \%jar, $cookie
-
- Return: nothing
-
-
-
- Parses the cookie into the various parts and then sets the appropriate
-
- values in the %jar under the name; if the cookie is blank, it will delete
-
- it from the jar.
-
-
-
- =cut
-
-
-
- sub cookie_parse {
-
- my ($jarref, $header)=@_;
-
- my ($del,$part,@parts,@construct,$cookie_name)=(0);
-
-
-
- return if(!(defined $jarref && ref($jarref)));
-
- return if(!(defined $header && length($header)>0));
-
-
-
- @parts=split(/;/,$header);
-
-
-
- foreach $part (@parts){
-
- if($part=~/^[ \t]*(.+?)=(.*)$/){
-
- my ($name,$val)=($1,$2);
-
- if($name=~/^domain$/i){
-
- $val=~s#^http://##;
-
- $val=~s#/.*$##;
-
- $construct[1]=$val;
-
- } elsif($name=~/^path$/i){
-
- $val=~s#/$## if($val ne '/');
-
- $construct[2]=$val;
-
- } elsif($name=~/^expires$/i){
-
- $construct[3]=$val;
-
- } else {
-
- $cookie_name=$name;
-
- if($val eq ''){ $del=1;
-
- } else { $construct[0]=$val;}
-
- }
-
- } else {
-
- if($part=~/secure/){
-
- $construct[4]=1;}
-
- } }
-
-
-
- if($del){
-
- delete $$jarref{$cookie_name} if defined $$jarref{$cookie_name};
-
- } else {
-
- $$jarref{$cookie_name}=\@construct;
-
- }
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::cookie_write
-
-
-
- Params: \%jar, \%hin, $override
-
- Return: nothing
-
-
-
- Goes through the given jar and sets the Cookie header in %hin pending the
-
- correct domain and path. If $override is true, then the domain and path
-
- restrictions of the cookies are ignored.
-
-
-
- Todo: factor in expire and secure.
-
-
-
- =cut
-
-
-
- sub cookie_write {
-
- my ($jarref, $hin, $override)=@_;
-
- my ($name,$out)=('','');
-
-
-
- return if(!(defined $jarref && ref($jarref)));
-
- return if(!(defined $hin && ref($hin) ));
-
-
-
- $override=$override||0;
-
- $$hin{'whisker'}->{'ssl'}=$$hin{'whisker'}->{'ssl'}||0;
-
-
-
- foreach $name (keys %$jarref){
-
- next if($name eq '');
-
- next if($$hin{'whisker'}->{'ssl'}==0 && $$jarref{$name}->[4]>0);
-
- if($override ||
-
- ($$hin{'whisker'}->{'host'}=~/$$jarref{$name}->[1]$/i &&
-
- $$hin{'whisker'}->{'uri'}=~/$$jarref{$name}->[2]/i)){
-
- $out.="$name=$$jarref{$name}->[0];";
-
- } }
-
-
-
- if($out ne ''){ $$hin{'Cookie'}=$out; }
-
-
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::cookie_get
-
-
-
- Params: \%jar, $name
-
- Return: @elements
-
-
-
- Fetch the named cookie from the jar, and return the components.
-
-
-
- =cut
-
-
-
- sub cookie_get {
-
- my ($jarref,$name)=@_;
-
-
-
- return undef if(!(defined $jarref && ref($jarref)));
-
-
-
- if(defined $$jarref{$name}){
-
- return @{$$jarref{$name}};}
-
-
-
- return undef;
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::cookie_set
-
-
-
- Params: \%jar, $name, $value, $domain, $path, $expire, $secure
-
- Return: nothing
-
-
-
- Set the named cookie with the provided values into the %jar.
-
-
-
- =cut
-
-
-
- sub cookie_set {
-
- my ($jarref,$name,$value,$domain,$path,$expire,$secure)=@_;
-
- my @construct;
-
-
-
- return if(!(defined $jarref && ref($jarref)));
-
-
-
- return if($name eq '');
-
- if($value eq ''){
-
- delete $$jarref{$name};
-
- return;}
-
- $path=$path||'/';
-
- $secure=$secure||0;
-
-
-
- @construct=($value,$domain,$path,$expire,$secure);
-
- $$jarref{$name}=\@construct;
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: crawl
-
-
-
- Used for crawling a website by requesting a (start) page, reading the
-
- HTML, extracting the links, and then requesting those links--up to a
-
- specified depth. The module also allows various configuration tweaks to
-
- do such things as monitor requests for offsite URLs (pages on other
-
- hosts), track various cookies, etc.
-
-
-
- =cut
-
-
-
- #####################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::crawl
-
-
-
- Params: $START, $MAX_DEPTH, \%tracking, \%hin
-
- Return: Nothing
-
-
-
- The heart of the crawl package. Will perform an HTTP crawl on the
-
- specified HOST, starting at START URI, proceeding up to MAX_DEPTH. A
-
- tracking hash reference (required) stores the results of each page (and
-
- ongoing progress). The http_in_options hash reference specifies a
-
- standard HTTP hash for use in the outgoing HTTP requests. Certain options
-
- are configurable via LW::crawl_set_config(). The tracking hash will
-
- contain all the pages visited; you can get the crawl engine to skip pages
-
- by placing them in the tracking hash ahead of time.
-
-
-
- START (first) parameter should be of the form "http://www.host.com/url".
-
-
-
- =cut
-
-
-
- sub crawl {
-
- my ($START, $MAX_DEPTH, $hrtrack, $hrin)=@_;
-
- my (%hout, %jar);
-
- my ($T, @ST, @links, @tlinks, @vals, @ERRORS)=('');
-
-
-
- return if(!(defined $hrtrack && ref($hrtrack)));
-
- return if(!(defined $hrin && ref($hrin) ));
-
- return if(!defined $START || length($START)==0);
-
-
-
- $MAX_DEPTH||=2;
-
-
-
- # $ST[0]=HOST $ST[1]=URL $ST[2]=CWD $ST[3]=HTTPS $ST[4]=SERVER
-
- # $ST[5]=PORT $ST[6]=DEPTH
-
-
-
- @vals=utils_split_uri($START);
-
- $ST[1]=$vals[0]; # uri
-
- $ST[0]=$vals[2]; # host
-
- $ST[5]=$vals[3]; # port
-
- $ST[4]=undef; # server tag
-
-
-
- return if($ST[0] eq '');
-
-
-
- # some various informationz...
-
- $LW::crawl_config{'host'}=$ST[0];
-
- $LW::crawl_config{'port'}=$ST[5];
-
- $LW::crawl_config{'start'}=$ST[1];
-
-
-
- $$hrin{'whisker'}->{'host'}=$ST[0];
-
- $$hrin{'whisker'}->{'port'}=$ST[5];
-
- $$hrin{'whisker'}->{'lowercase_incoming_headers'}=1; # makes life easier
-
-
-
- http_fixup_request($hrin);
-
-
-
- # this is so callbacks can access internals via references
-
- $LW::crawl_config{'ref_links'}=\@links;
-
- $LW::crawl_config{'ref_jar'}=\%jar;
-
- $LW::crawl_config{'ref_hin'}=$hrin;
-
- $LW::crawl_config{'ref_hout'}=\%hout;
-
-
-
- %LW::crawl_referrers=(); # empty out existing referrers
-
- %LW::crawl_server_tags=();
-
- %LW::crawl_offsites=();
-
- %LW::crawl_cookies=();
-
- %LW::crawl_forms=();
-
-
-
- push @links, \@{[$ST[1],1,($vals[1] eq 'https')?1:0]};
-
-
-
- while(@links){
-
- my $C=shift @links;
-
- $ST[1]=$C->[0]; # url
-
- $ST[6]=$C->[1]; # depth
-
- $ST[3]=$C->[2]; # https
-
-
-
- next if(defined $$hrtrack{$ST[1]} && $$hrtrack{$ST[1]} ne '?');
-
-
-
- if($ST[6] > $MAX_DEPTH){
-
- $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0);
-
- next;
-
- }
-
-
-
- $ST[2]=utils_get_dir($ST[1]);
-
-
-
- $$hrin{'whisker'}->{'uri'}=$ST[1];
-
- $$hrin{'whisker'}->{'ssl'}=$ST[3];
-
- my $result = crawl_do_request($hrin,\%hout);
-
- if($result==1 || $result==2){
-
- push @ERRORS, "Error on making request for '$ST[1]': $hout{'whisker'}->{'error'}";
-
- next;
-
- }
-
-
-
- if($result==0 || $result==4){
-
- $$hrtrack{$ST[1]}=$hout{'whisker'}->{'http_resp'}; }
-
-
-
- if($result==3 || $result==5){
-
- $$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); }
-
-
-
- if(defined $hout{'server'}){
-
- if(!defined $ST[4]){ # server tag
-
- $ST[4]=$hout{'server'}; }
-
- $LW::crawl_server_tags{$hout{'server'}}++;
-
- }
-
-
-
- if(defined $hout{'set-cookie'}){
-
- if($LW::crawl_config{'save_cookies'}>0){
-
- if(ref($hout{'set-cookie'})){
-
- foreach (@{$hout{'set-cookie'}}){
-
- $LW::crawl_cookies{$_}++; }
-
- } else {
-
- $LW::crawl_cookies{$hout{'set-cookie'}}++;
-
- } }
-
-
-
- if($LW::crawl_config{'reuse_cookies'}>0){
-
- cookie_read(\%jar,\%hout); }
-
- }
-
-
-
-
-
- next if($result==4 || $result==5);
-
- next if(scalar @links > $LW::crawl_config{'url_limit'});
-
-
-
- if($result==0){ # page should be parsed
-
- if($LW::crawl_config{'source_callback'} != 0 &&
-
- ref($LW::crawl_config{'source_callback'})){
-
- &{$LW::crawl_config{'source_callback'}}($hrin,\%hout); }
-
-
-
- LW::html_find_tags(\$hout{'whisker'}->{'data'},
-
- \&crawl_extract_links_test);
-
- $LW::crawl_config{'stats_html'}++; # count how many pages we've parsed
-
- }
-
-
-
- if($result==3){ # follow the move via location header
-
- push @LW::crawl_urls, $hout{'location'}; }
-
-
-
- foreach $T (@LW::crawl_urls){
-
- $T=~tr/\0\r\n//d; # the NULL character is a bug that's somewhere
-
- next if (length($T)==0);
-
- next if ($T=~/^javascript:/i); # stupid javascript
-
- next if ($T=~/^mailto:/i);
-
- next if ($T=~m#^([a-zA-Z]*)://# && lc($1) ne 'http' && lc($1) ne 'https');
-
- next if ($T=~/^#/i); # fragment
-
-
-
- if($LW::crawl_config{'callback'} != 0){
-
- next if &{$LW::crawl_config{'callback'}}($T,@ST); }
-
-
-
- push(@{$LW::crawl_referrers{$T}}, $ST[1])
-
- if( $LW::crawl_config{'save_referrers'}>0 );
-
-
-
- $T=utils_absolute_uri($T,$ST[1],1) if($LW::crawl_config{'normalize_uri'}>0);
-
- @vals=utils_split_uri($T);
-
-
-
- # slashdot bug: workaround for the following fsck'd html code:
-
- # <FORM ACTION="//slashdot.org/users.pl" METHOD="GET">
-
- if($LW::crawl_config{'slashdot_bug'} > 0 &&
-
- substr($vals[0],0,2) eq '//'){
-
- if($ST[3]==1){ $T='https:'.$T;
-
- } else { $T='http:' .$T; }
-
- @vals=utils_split_uri($T);
-
- }
-
-
-
- # make sure URL is on same host, port, and protocol
-
- if( (defined $vals[2] && $vals[2] ne $ST[0]) ||
-
- (defined $vals[3] && $vals[3] != $ST[5]) ||
-
- (defined $vals[1] && ($vals[1] ne 'http'
-
- && $vals[1] ne 'https'))){
-
- if($LW::crawl_config{'save_offsites'}>0){
-
- $LW::crawl_offsites{utils_join_uri(@vals)}++; }
-
- next; }
-
-
-
- if(substr($vals[0],0,1) ne '/'){
-
- $vals[0]=$ST[2].$vals[0]; }
-
-
-
- my $where=rindex($vals[0],'.');
-
- my $EXT='';
-
- if($where >= 0){
-
- $EXT = substr($vals[0], $where+1, length($vals[0])-$where); }
-
-
-
- $EXT=~tr/0-9a-zA-Z//cd; # yucky chars will puke regex below
-
-
-
- if($EXT ne '' && $LW::crawl_config{'skip_ext'}=~/\.$EXT /i){
-
- if($LW::crawl_config{'save_skipped'}>0){
-
- $$hrtrack{$vals[0]}='?'; }
-
- next; }
-
-
-
- if(defined $vals[4] && $LW::crawl_config{'use_params'}>0){
-
- if($LW::crawl_config{'params_double_record'}>0 &&
-
- !defined $$hrtrack{$vals[0]}){
-
- $$hrtrack{$vals[0]}='?'; }
-
- $vals[0]=$vals[0].'?'.$vals[4];
-
- }
-
-
-
- next if(defined $$hrtrack{$vals[0]});
-
-
-
- push @links, \@{[$vals[0],$ST[6]+1, ($vals[1] eq 'https')?1:0]};
-
-
-
- } # foreach
-
-
-
- @LW::crawl_urls=(); # reset for next round
-
- } # while
-
-
-
- my $key;
-
- foreach $key (keys %LW::crawl_config){
-
- delete $LW::crawl_config{$key} if (substr($key,0,4) eq 'ref_');}
-
-
-
- $LW::crawl_config{'stats_reqs'}=$hout{'whisker'}->{'stats_reqs'};
-
- $LW::crawl_config{'stats_syns'}=$hout{'whisker'}->{'stats_syns'};
-
-
-
- } # end sub crawl
-
-
-
- #####################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::crawl_get_config
-
-
-
- Params: $config_directive
-
- Return: $config_directive_value
-
-
-
- Returns the set value of the submitted config_directive. See
-
- LW::crawl_set_config() for a list of configuration values.
-
-
-
- =cut
-
-
-
- sub crawl_get_config {
-
- my $key=shift;
-
- return $LW::crawl_config{$key};
-
- }
-
-
-
- #####################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::crawl_set_config
-
-
-
- Params: $config_directive, $value
-
- Return: Nothing
-
-
-
- This function adjusts the configuration of the crawl package. Use values
-
- of 0 and 1 for off and on, respectively. The defaults are set in
-
- libs/globals.wpl.
-
-
-
- save_cookies
-
- - crawl will save all cookies encountered, for later review
-
-
-
- save_offsite_urls
-
- - crawl will save all offsite URLs (URLs not on this host); crawl
-
- will not actually crawl those hosts (use separate calls to crawl)
-
-
-
- follow_moves
-
- - crawl will follow the URL received from an HTTP move response
-
-
-
- use_params
-
- - crawl will factor in URI parameters when considering if a URI is unique
-
- or not
-
-
-
- params_double_record
-
- - if both use_params and params_double_record are set, crawl will make two
-
- entries for each URI which has paramaters: one with and one without the
-
- parameters
-
-
-
- reuse_cookies
-
- - crawl will resubmit any received/prior cookies
-
-
-
- skip_ext
-
- - crawl will ignore requests for URLs ending in extensions given; the
-
- value requires a specific string format: (dot)extension(space). For
-
- example, to ignore GIFs and JPGs, you would run:
-
- LW::crawl_set_config('skip_ext',".gif .jpg ");
-
-
-
- save_skipped
-
- - any URLs that are skipped via skip_ext, or are above the specified DEPTH
-
- will be recorded in the tracking hash with a value of '?' (instead of an
-
- HTTP response code).
-
-
-
- callback
-
- - crawl will call this function (if this is a reference to a function),
-
- passing it the current URI and the @ST array (which has host, port, SSL,
-
- etc info). If the function returns a TRUE value, then crawl will skip
-
- that URI. Set to value 0 (zero) if you do not want to use a callback.
-
-
-
- slashdot_bug
-
- - slashdot.org uses a screwy piece of invalid (yet it works) HTML in
-
- the form of <FORM ACTION="//slashdot.org/somefile">. So basically,
-
- when a URL starts with '//' and slashdot_bug is set to 1 (which it
-
- is by default), then the proper 'http:' or 'https:' will be prepended
-
- to the URL.
-
-
-
- source_callback
-
- - crawl will call this function (if this is a reference to a function),
-
- passing references to %hin and %hout, right before it parses the page
-
- for HTML links. This allows the callback function to review or
-
- modify the HTML before it's parsed for links. Return value is ignored.
-
-
-
- url_limit
-
- - number or URLs that crawl will queue up at one time; defaults to 1000
-
-
-
- do_head
-
- - use head requests to determine if a file has a content-type worth
-
- downloading. Potentially saves some time, assuming the server properly
-
- supports HEAD requests. Set to value 1 to use (0/off by default).
-
-
-
-
-
- =cut
-
-
-
- sub crawl_set_config {
-
- return if(!defined $_[0]);
-
- my %opts=@_;
-
- while( my($k,$v)=each %opts){
-
- $LW::crawl_config{lc($k)}=$v; }
-
- }
-
-
-
- #####################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::crawl_extract_links_test (INTERNAL)
-
-
-
- Params: $TAG, \%elements, \$html_data, $offset, $len
-
- Return: nothing
-
-
-
- This is the callback function used by the crawl function, and passed to
-
- html_find_tags. It will find URL/URI links and place them in
-
- @LW::crawl_urls.
-
-
-
- =cut
-
-
-
- sub crawl_extract_links_test {
-
- my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);
-
- my $t;
-
-
-
- # this should be most of the time...
-
- return undef if(!defined ($t=$LW::crawl_linktags{$TAG}));
-
- return undef if(!scalar %$hr); # fastpath quickie
-
-
-
- while( my ($key,$val)= each %$hr){ # normalize element values
-
- $$hr{lc($key)} = $val;
-
- }
-
-
-
- if(ref($t)){
-
- foreach (@$t){
-
- push(@LW::crawl_urls,$$hr{$_}) if(defined $$hr{$_});
-
- }
-
- } else {
-
- push(@LW::crawl_urls,$$hr{$t}) if(defined $$hr{$t});
-
- }
-
-
-
- if($TAG eq 'form' && defined $$hr{action}){
-
- my $u=$LW::crawl_config{'ref_hout'}->{'whisker'}->{'uri'};
-
- $LW::crawl_forms{utils_absolute_uri($$hr{action},$u,1)}++;
-
- }
-
-
-
- return undef;
-
- }
-
-
-
- ################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::crawl_make_request (INTERNAL)
-
-
-
- Params: \%hin, \%hout
-
- Return: $status_code
-
-
-
- This is an internal function used by LW::crawl(), and is responsible for
-
- making HTTP requests, including any HEAD pre-requests and following move
-
- responses. Status codes are:
-
- 0 Success
-
- 1 Error during request
-
- 2 Error on connection setup
-
- 3 Move request; follow Location header
-
- 4 File not of text/htm(l) type
-
- 5 File not available
-
-
-
- =cut
-
-
-
- sub crawl_do_request {
-
- my ($hrin,$hrout) = @_;
-
- my $ret;
-
-
-
- if($LW::crawl_config{'do_head'}){
-
- my $save=$$hrin{'whisker'}->{'method'};
-
- $$hrin{'whisker'}->{'method'}='HEAD';
-
- $ret=http_do_request($hrin,$hrout);
-
- $$hrin{'whisker'}->{'method'}=$save;
-
-
-
- return 2 if($ret==2); # if there was connection error, do not continue
-
- if($ret==0){ # successful request
-
- if($$hrout{'whisker'}->{'http_resp'}==501){ # HEAD not allowed
-
- $LW::crawl_config{'do_head'}=0; # no more HEAD requests
-
- }
-
-
-
- if($$hrout{'whisker'}->{'http_resp'} <308 &&
-
- $$hrout{'whisker'}->{'http_resp'} >300){
-
- if($LW::crawl_config{'follow_moves'} >0){
-
- return 3 if(defined $$hrout{'location'}); }
-
- return 5; # not avail
-
- }
-
-
-
- if($$hrout{'whisker'}->{'http_resp'}==200){
-
- # no content-type is treated as text/htm
-
- if(defined $$hrout{'content-type'} &&
-
- $$hrout{'content-type'}!~/^text\/htm/i){
-
- return 4;
-
- }
-
- # fall through to GET request below
-
- }
-
- }
-
- # request errors are essentially redone via GET, below
-
- }
-
-
-
- return http_do_request($hrin,$hrout);
-
- }
-
-
-
- #####################################################
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: dump
-
-
-
- The dump subpackage contains various utility functions which emulate
-
- the basic functionality provided by Data::Dumper.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::dumper
-
-
-
- Params: $name, \@array [, $name, \%hash, $name, \$scalar ]
-
- Return: $code, undef on error
-
-
-
- The dumper function will take the given $name and data reference, and
-
- will create an ASCII perl code representation suitable for eval'ing
-
- later to recreate the same structure. $name is the name of the variable
-
- that it will be saved as. Example:
-
-
-
- $output = LW::dumper('hin',\%hin);
-
-
-
- NOTE: dumper() creates anonymous structures under the name given. For
-
- example, if you dump the hash %hin under the name 'hin', then when you
-
- eval the dumped code you will need to use %$hin, since $hin is now a
-
- *reference* to a hash.
-
-
-
- =cut
-
-
-
- sub dumper {
-
- my %what=@_;
-
- my ($final,$k,$v)=('');
-
- while( ($k,$v)=each %what){
-
- return undef if(ref($k) || !ref($v));
-
- $final.="\$$k = "._dump(1,$v,1);
-
- $final=~s#,\n$##;
-
- $final.=";\n"; }
-
- return $final;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::dumper_writefile
-
-
-
- Params: $file, $name, \@array [, $name, \%hash, $name, \@scalar ]
-
- Return: 0 if success; 1 if error
-
-
-
- This calls dumper() and saves the output to the specified $file.
-
-
-
- Note: LW does not checking on the validity of the file name, it's
-
- creation, or anything of the sort. Files are opened in overwrite
-
- mode.
-
-
-
- =cut
-
-
-
- sub dumper_writefile {
-
- my $file=shift;
-
- my $output=dumper(@_);
-
- return 1 if(!open(OUT,">$file") || $output eq 'ERROR');
-
- print OUT $output;
-
- close(OUT);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::_dump (INTERNAL)
-
-
-
- Params: $tabs, $ref
-
- Return: $output
-
-
-
- This is an internal function to dumper() which will dereference all
-
- elements and produce the resulting code.
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub _dump { # dereference and dump an element
-
- my ($t, $ref, $depth)=@_;
-
- my ($out,$k,$v)=('');
-
- $depth||=1;
-
-
-
- # to protect against circular loops
-
- return 'undef' if($depth > 128);
-
-
-
- if(!defined $ref){
-
- return 'undef';
-
- } elsif(ref($ref) eq 'HASH'){
-
- $out.="{\n";
-
- while( ($k,$v)=each %$ref){
-
- next if($k eq '');
-
- $out.= "\t"x$t;
-
- $out.=_dumpd($k).' => ';
-
- if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }
-
- else { $out.=_dumpd($v); }
-
- $out.=",\n" unless( substr($out,-2,2) eq ",\n");
-
- }
-
- $out=~s#,\n$#\n#;
-
- $out.="\t"x($t-1);
-
- $out.="},\n";
-
- } elsif(ref($ref) eq 'ARRAY'){
-
- $out.="[";
-
- if(~~@$ref){
-
- $out.="\n";
-
- foreach $v (@$ref) {
-
- $out.= "\t"x$t;
-
- if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }
-
- else { $out.=_dumpd($v); }
-
- $out.=",\n" unless( substr($out,-2,2) eq ",\n");
-
- }
-
- $out=~s#,\n$#\n#;
-
- $out.="\t"x($t-1);
-
- }
-
- $out.="],\n";
-
- } elsif(ref($ref) eq 'SCALAR'){
-
- $out.=_dumpd($$ref);
-
- } elsif(ref($ref) eq 'REF'){
-
- $out.=_dump($t,$$ref,$depth+1);
-
- } elsif(ref($ref)){ # unknown/unsupported ref
-
- $out.="undef";
-
- } else { # normal scalar
-
- $out.=_dumpd($ref);
-
- }
-
- return $out;
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::_dumpd (INTERNAL)
-
-
-
- Params: $string
-
- Return: $escaped_string
-
-
-
- This is an internal function to dumper() which will escape the given
-
- string to make it suitable for printing.
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub _dumpd { # escape a scalar string
-
- my $v=shift;
-
- return 'undef' if(!defined $v);
-
- return "''" if($v eq '');
-
- return "$v" if($v!~tr/0-9//c);
-
- return "'$v'" if($v!~tr/ !-~//c);
-
- $v=~s#\\#\\\\#g; $v=~s#"#\\"#g;
-
- $v=~s#\r#\\r#g; $v=~s#\n#\\n#g;
-
- $v=~s#\0#\\0#g; $v=~s#\t#\\t#g;
-
- $v=~s#([^!-~ ])#sprintf('\\x%02x',ord($1))#eg;
-
- return "\"$v\"";
-
- }
-
-
-
- ########################################################################
-
- =pod
-
-
-
- =head1 ++ Sub package: easy
-
-
-
- The 'easy' subpackage contains many high-level/simple functions to
-
- do basic web tasks. This should make it easier to use libwhisker
-
- to do basic tasks.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::get_page
-
-
-
- Params: $url [, \%hin_request]
-
- Return: $code, $data ($code will be set to undef on error, $data will
-
- contain error message)
-
-
-
- This function will fetch the page at the given URL, and return the HTTP response code
-
- and page contents. Use this in the form of:
-
- ($code,$html)=LW::get_page("http://host.com/page.html")
-
-
-
- The optional %hin_request will be used if supplied. This allows you to set
-
- headers and other parameters.
-
-
-
- =cut
-
-
-
- sub get_page {
-
- my ($URL,$hr)=(shift,shift);
-
- return (undef,"No URL supplied") if(length($URL)==0);
-
-
-
- my (%req,%resp);
-
- my $rptr;
-
-
-
- if(defined $hr && ref($hr)){
-
- $rptr=$hr;
-
- } else {
-
- $rptr=\%req;
-
- LW::http_init_request(\%req);
-
- }
-
-
-
- LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
-
- LW::http_fixup_request($rptr);
-
-
-
- if(http_do_request($rptr,\%resp)){
-
- return (undef,$resp{'whisker'}->{'error'});
-
- }
-
-
-
- return ($resp{'whisker'}->{'code'}, $resp{'whisker'}->{'data'});
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::get_page_hash
-
-
-
- Params: $url [, \%hin_request]
-
- Return: $hash_ref (undef on no URL)
-
-
-
- This function will fetch the page at the given URL, and return the whisker
-
- HTTP response hash. The return code of the function is set to
-
- $hash_ref->{whisker}->{get_page_hash}, and uses the LW::http_do_request()
-
- response values.
-
-
-
- Note: undef is returned if no URL is supplied
-
-
-
- =cut
-
-
-
- sub get_page_hash {
-
- my ($URL,$hr)=(shift,shift);
-
- return undef if(length($URL)==0);
-
-
-
- my (%req,%resp);
-
- my $rptr;
-
-
-
- if(defined $hr && ref($hr)){
-
- $rptr=$hr;
-
- } else {
-
- $rptr=\%req;
-
- LW::http_init_request(\%req);
-
- }
-
-
-
- LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
-
- LW::http_fixup_request($rptr);
-
-
-
- my $r=http_do_request($rptr,\%resp);
-
- $resp{whisker}->{get_page_hash}=$r;
-
-
-
- return \%resp;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::get_page_to_file
-
-
-
- Params: $url, $filepath [, \%hin_request]
-
- Return: $code ($code will be set to undef on error)
-
-
-
- This function will fetch the page at the given URL, place the resulting HTML
-
- in the file specified, and return the HTTP response code. The optional
-
- %hin_request hash sets the default parameters to be used in the request.
-
-
-
- NOTE: libwhisker does not do any file checking; libwhisker will open the
-
- supplied filepath for writing, overwriting any previously-existing files.
-
- Libwhisker does not differentiate between a bad request, and a bad file
-
- open. If you're having troubles making this function work, make sure
-
- that your $filepath is legal and valid, and that you have appropriate
-
- write permissions to create/overwrite that file.
-
-
-
- =cut
-
-
-
- sub get_page_to_file {
-
- my ($URL, $filepath, $hr)=@_;
-
-
-
- return undef if(length($URL)==0);
-
- return undef if(length($filepath)==0);
-
-
-
- my (%req,%resp);
-
- my $rptr;
-
-
-
- if(defined $hr && ref($hr)){
-
- $rptr=$hr;
-
- } else {
-
- $rptr=\%req;
-
- LW::http_init_request(\%req);
-
- }
-
-
-
- LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
-
- LW::http_fixup_request($rptr);
-
-
-
- if(http_do_request($rptr,\%resp)){
-
- return undef;
-
- }
-
- open(OUT,">$filepath") || return undef;
-
- binmode(OUT); # stupid Windows
-
- print OUT $resp{'whisker'}->{'data'};
-
- close(OUT);
-
-
-
- return $resp{'whisker'}->{'code'};
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::upload_file
-
-
-
- Params: $url, $filepath, $paramname [, \%hin_request]
-
- Return: $code ($code will be set to undef on error)
-
-
-
- This function will upload the specified $file to the given $url as
-
- the parameter named $paramname via a multipart POST request. The
-
- optional $hin_request hash lets you set any other particular request
-
- parameters.
-
-
-
- NOTE: this is a highly simplied function for basic uploads. If you
-
- need to do more advanced things like set other multipart form
-
- parameters, send multiple files, etc, then you will need to use the
-
- normal API to do it yourself.
-
-
-
- =cut
-
-
-
- sub upload_file {
-
- my ($URL, $filepath, $paramname, $hr)=@_;
-
-
-
- return undef if(length($URL) ==0);
-
- return undef if(length($filepath) ==0);
-
- return undef if(length($paramname)==0);
-
- return undef if(!(-e $filepath && -f $filepath));
-
-
-
- my (%req,%resp,%multi);
-
- my $rptr;
-
-
-
- if(defined $hr && ref($hr)){
-
- $rptr=$hr;
-
- } else {
-
- $rptr=\%req;
-
- LW::http_init_request(\%req);
-
- }
-
-
-
- LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
-
- $rptr{'whisker'}->{'method'}='POST';
-
- LW::http_fixup_request($rptr);
-
-
-
- LW::multipart_setfile(\%multi,$filepath,$paramname);
-
- LW::multipart_write(\%multi,$rptr);
-
-
-
- if(http_do_request($rptr,\%resp)){
-
- return undef;
-
- }
-
-
-
- return $resp{'whisker'}->{'code'};
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::download_file
-
-
-
- Params: $url, $filepath [, \%hin_request]
-
- Return: $code ($code will be set to undef on error)
-
-
-
- LW::download_file is just an alias for LW::get_page_to_file().
-
-
-
- =cut
-
-
-
- sub download_file {
-
- goto &LW::get_page_to_file;
-
- }
-
-
-
- ########################################################################
-
-
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: encode
-
-
-
- Various type encodings. Installing MIME::Base64 will result in a
-
- compiled C version of base64 functions, which means they will be tons
-
- faster. This is useful if you're going to run a Basic authentication
-
- brute force, which requires a high processing speed. However, it's not
-
- required, since I include a Perl version, which is slower.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::encode_base64
-
-
-
- Params: $data, $eol
-
- Return: $base64_encoded_data
-
-
-
- LW::encode_base64 is a stub function which will choose the fastest
-
- function available for doing base64 encoding. This is done by checking to
-
- see if the MIME::Base64 perl module is available (which uses fast C
-
- routines). If it's not, then it defaults to a perl version (which is
-
- slower). You can call the perl version direct, but I suggest using the
-
- stub to gain speed advantages where possible.
-
-
-
- =cut
-
-
-
- #sub encode_base64;
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::decode_base64
-
-
-
- Params: $data
-
- Return: $base64_decoded_data
-
-
-
- LW::decode_base64 is a stub function which will choose the fastest
-
- function available for doing base64 decoding. This is done by checking to
-
- see if the MIME::Base64 perl module is available (which uses fast C
-
- routines). If it's not, then it defaults to a perl version (which is
-
- slower). You can call the perl version direct, but I suggest using the
-
- stub to gain speed advantages where possible.
-
-
-
- =cut
-
-
-
- #sub decode_base64;
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::encode_base64_perl
-
-
-
- Params: $data, $eol
-
- Return: $b64_encoded_data
-
-
-
- A perl implementation of base64 encoding. I recommend you use
-
- LW::encode_base64 instead, since it may use the MIME::Base64 module (if
-
- available), which lead to speed advantages. The perl code for this
-
- function was actually taken from an older MIME::Base64 perl module, and
-
- bears the following copyright:
-
-
-
- Copyright 1995-1999 Gisle Aas <gisle@aas.no>
-
-
-
- NOTE: the $eol parameter will be inserted every 76 characters. This is
-
- used to format the data for output on a 80 character wide terminal.
-
-
-
- =cut
-
-
-
- sub encode_base64_perl { # ripped from MIME::Base64
-
- my $res = "";
-
- my $eol = $_[1];
-
- $eol = "\n" unless defined $eol;
-
- pos($_[0]) = 0;
-
- while ($_[0] =~ /(.{1,45})/gs) {
-
- $res .= substr(pack('u', $1), 1);
-
- chop($res);}
-
- $res =~ tr|` -_|AA-Za-z0-9+/|;
-
- my $padding = (3 - length($_[0]) % 3) % 3;
-
- $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
-
- if (length $eol) {
-
- $res =~ s/(.{1,76})/$1$eol/g;
-
- } $res; }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::decode_base64_perl
-
-
-
- Params: $data
-
- Return: $b64_decoded_data
-
-
-
- A perl implementation of base64 decoding. The perl code for this function
-
- was actually taken from an older MIME::Base64 perl module, and bears the
-
- following copyright:
-
-
-
- Copyright 1995-1999 Gisle Aas <gisle@aas.no>
-
-
-
- =cut
-
-
-
- sub decode_base64_perl { # ripped from MIME::Base64
-
- my $str = shift;
-
- my $res = "";
-
- $str =~ tr|A-Za-z0-9+=/||cd;
-
- $str =~ s/=+$//; # remove padding
-
- $str =~ tr|A-Za-z0-9+/| -_|; # convert to uuencoded format
-
- while ($str =~ /(.{1,60})/gs) {
-
- my $len = chr(32 + length($1)*3/4); # compute length byte
-
- $res .= unpack("u", $len . $1 ); # uudecode
-
- }$res;}
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::encode_str2uri
-
-
-
- Params: $data
-
- Return: $result
-
-
-
- This function encodes every character (except the / character) with normal
-
- URL hex encoding.
-
-
-
- =cut
-
-
-
- sub encode_str2uri { # normal hex encoding
-
- my $str=shift;
-
- $str=~s/([^\/])/sprintf("%%%02x",ord($1))/ge;
-
- return $str;}
-
-
-
-
-
- #########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::encode_str2ruri
-
-
-
- Params: $data
-
- Return: $result
-
-
-
- This function randomly encodes characters (except the / character) with
-
- normal URL hex encoding.
-
-
-
- =cut
-
-
-
- sub encode_str2ruri { # random normal hex encoding
-
- my @T=split(//,shift);
-
- my $s;
-
- foreach (@T) {
-
- if(m#;=:&@\?#){
-
- $s.=$_;
-
- next;
-
- }
-
- if((rand()*2)%2 == 1){ $s.=sprintf("%%%02x",ord($_)) ;
-
- }else{ $s.=$_; }
-
- }
-
- return $s;
-
- }
-
-
-
- #########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::encode_unicode
-
-
-
- Params: $data
-
- Return: $result
-
-
-
- This function converts a normal string into Windows unicode format.
-
-
-
- =cut
-
-
-
- sub encode_unicode
-
- {
-
- my $r='';
-
- foreach $c (split(//,shift)){
-
- $r.=pack("v",ord($c));
-
- }
-
- return $r;
-
- }
-
-
-
- #########################################################################
-
- =pod
-
-
-
- =head1 ++ Sub package: forms
-
-
-
- This subpackage contains various routines to parse and handle HTML forms.
-
- The goal is to parse the variable, human-readable HTML into concrete
-
- structures useable by your program. The forms package does do a good job
-
- at making these structures, but I will admit: they are not exactly simple,
-
- and thus not a cinch to work with. But then again, representing something
-
- as complex as a HTML form is not a simple thing either. I think the
-
- results are acceptable for what's trying to be done. Anyways...
-
-
-
- Forms are stored in perl hashes, with elements in the following format:
-
-
-
- $form{'element_name'}=@([ 'type', 'value', @params ])
-
-
-
- Thus every element in the hash is an array of anonymous arrays. The first
-
- array value contains the element type (which is 'select', 'textarea',
-
- 'button', or an 'input' value of the form 'input-text', 'input-hidden',
-
- 'input-radio', etc).
-
-
-
- The second value is the value, if applicable (it could be undef if no
-
- value was specified). Note that select elements will always have an undef
-
- value--the actual values are in the subsequent options elements.
-
-
-
- The third value, if defined, is an anonymous array of additional tag
-
- parameters found in the element (like 'onchange="blah"', 'size="20"',
-
- 'maxlength="40"', 'selected', etc).
-
-
-
- The array does contain one special element, which is stored in the hash
-
- under a NULL character ("\0") key. This element is of the format:
-
-
-
- $form{"\0"}=['name', 'method', 'action', @parameters];
-
-
-
- The element is an anonymous array that contains strings of the form's
-
- name, method, and action (values can be undef), and a @parameters array
-
- similar to that found in normal elements (above).
-
-
-
- Accessing individual values stored in the form hash becomes a test of your
-
- perl referencing skills. Hint: to access the 'value' of the third element
-
- named 'choices', you would need to do:
-
-
-
- $form{'choices'}->[2]->[1];
-
-
-
- The '[2]' is the third element (normal array starts with 0), and the
-
- actual value is '[1]' (the type is '[0]', and the parameter array is
-
- '[2]').
-
-
-
- =cut
-
-
-
- ################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::forms_read
-
-
-
- Params: \$html_data
-
- Return: @found_forms
-
-
-
- This function parses the given $html_data into libwhisker form hashes.
-
- It returns an array of hash references to the found forms.
-
-
-
- =cut
-
-
-
- sub forms_read {
-
- my $dr=shift;
-
- return undef if(!ref($dr) || length($$dr)==0);
-
-
-
- @LW::forms_found=();
-
- LW::html_find_tags($dr,\&forms_parse_callback);
-
-
-
- if(scalar %LW::forms_current){
-
- my %DUP=%LW::forms_current;
-
- push(@LW::forms_found,\%DUP);
-
- }
-
- return @LW::forms_found;
-
- }
-
-
-
- ################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::forms_write
-
-
-
- Params: \%form_hash
-
- Return: $html_of_form [undef on error]
-
-
-
- This function will take the given %form hash and compose a generic HTML
-
- representation of it, formatted with tabs and newlines in order to make it
-
- neat and tidy for printing.
-
-
-
- Note: this function does *not* escape any special characters that were
-
- embedded in the element values.
-
-
-
- =cut
-
-
-
- sub forms_write {
-
- my $hr=shift;
-
- return undef if(!ref($hr) || !(scalar %$hr));
-
- return undef if(!defined $$hr{"\0"});
-
-
-
- my $t='<form name="'.$$hr{"\0"}->[0].'" method="';
-
- $t.=$$hr{"\0"}->[1].'" action="'.$$hr{"\0"}->[2].'"';
-
- if(defined $$hr{"\0"}->[3]){
-
- $t.=' '.join(' ',@{$$hr{"\0"}->[3]}); }
-
- $t.=">\n";
-
-
-
- while( my($name,$ar)=each(%$hr) ){
-
- next if($name eq "\0");
-
- foreach $a (@$ar){
-
- my $P='';
-
- $P=' '.join(' ', @{$$a[2]}) if(defined $$a[2]);
-
- $t.="\t";
-
-
-
- if($$a[0] eq 'textarea'){
-
- $t.="<textarea name=\"$name\"$P>$$a[1]";
-
- $t.="</textarea>\n";
-
-
-
- } elsif($$a[0]=~m/^input-(.+)$/){
-
- $t.="<input type=\"$1\" name=\"$name\" ";
-
- $t.="value=\"$$a[1]\"$P>\n";
-
-
-
- } elsif($$a[0] eq 'option'){
-
- $t.="\t<option value=\"$$a[1]\"$P>$$a[1]\n";
-
-
-
- } elsif($$a[0] eq 'select'){
-
- $t.="<select name=\"$name\"$P>\n";
-
-
-
- } elsif($$a[0] eq '/select'){
-
- $t.="</select$P>\n";
-
-
-
- } else { # button
-
- $t.="<button name=\"$name\" value=\"$$a[1]\">\n";
-
- }
-
- }
-
- }
-
-
-
- $t.="</form>\n";
-
- return $t;
-
- }
-
-
-
- ################################################################
-
-
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::forms_parse_html (INTERNAL)
-
-
-
- Params: $TAG, \%elements, \$html_data, $offset, $len
-
- Return: nothing
-
-
-
- This is an &html_find_tags callback used to parse HTML into form hashes.
-
- You should not call this directly, but instead use &LW::forms_read.
-
-
-
- =cut
-
-
-
- { # these are private static variables for &forms_parse_html
-
- %FORMS_ELEMENTS=( 'form'=>1, 'input'=>1,
-
- 'textarea'=>1, 'button'=>1,
-
- 'select'=>1, 'option'=>1,
-
- '/select'=>1 );
-
- $CURRENT_SELECT=undef;
-
- $UNKNOWNS=0;
-
-
-
- sub forms_parse_callback {
-
- my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);
-
- my ($saveparam, $parr, $key)=(0,undef,'');
-
-
-
- # fastpath shortcut
-
- return undef if(!defined $FORMS_ELEMENTS{$TAG});
-
- LW::utils_lowercase_hashkeys($hr) if(scalar %$hr);
-
-
-
- if($TAG eq 'form'){
-
-
-
- if(scalar %LW::forms_current){ # save last form
-
- my %DUP=%LW::forms_current;
-
- push (@LW::forms_found, \%DUP);
-
- %LW::forms_current=();
-
- }
-
-
-
- $LW::forms_current{"\0"}=[$$hr{name},$$hr{method},
-
- $$hr{action},undef];
-
- delete $$hr{'name'}; delete $$hr{'method'}; delete $$hr{'action'};
-
- $key="\0"; $parr=\@{$LW::forms_current{"\0"}};
-
- $UNKNOWNS=0;
-
-
-
- } elsif($TAG eq 'input'){
-
- $$hr{type}='text' if(!defined $$hr{type});
-
- $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
-
- $key=$$hr{name};
-
-
-
- push( @{$LW::forms_current{$key}},
-
- (['input-'.$$hr{type},$$hr{value},undef]) );
-
- delete $$hr{'name'}; delete $$hr{'type'}; delete $$hr{'value'};
-
- $parr=\@{$LW::forms_current{$key}->[-1]};
-
-
-
- } elsif($TAG eq 'select'){
-
- $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
-
- $key=$$hr{name};
-
- push( @{$LW::forms_current{$key}}, (['select',undef,undef]) );
-
- $parr=\@{$LW::forms_current{$key}->[-1]};
-
- $CURRENT_SELECT=$key;
-
- delete $$hr{name};
-
-
-
- } elsif($TAG eq '/select'){
-
- push( @{$LW::forms_current{$CURRENT_SELECT}},
-
- (['/select',undef,undef]) );
-
- $CURRENT_SELECT=undef;
-
- return undef;
-
-
-
- } elsif($TAG eq 'option'){
-
- return undef if(!defined $CURRENT_SELECT);
-
- if(!defined $$hr{value}){
-
- my $stop=index($$dr,'<',$start+$len);
-
- return undef if($stop==-1); # MAJOR PUKE
-
- $$hr{value}=substr($$dr,$start+$len,
-
- ($stop-$start-$len));
-
- $$hr{value}=~tr/\r\n//d;
-
- }
-
- push( @{$LW::forms_current{$CURRENT_SELECT}},
-
- (['option',$$hr{value},undef]) );
-
- delete $$hr{value};
-
- $parr=\@{$LW::forms_current{$CURRENT_SELECT}->[-1]};
-
-
-
- } elsif($TAG eq 'textarea'){
-
- my $stop=$start+$len;
-
- # find closing </textarea> tag
-
- do { $stop=index($$dr,'</',$stop+2);
-
- return undef if($stop==-1); # MAJOR PUKE
-
- } while( lc(substr($$dr,$stop+2,8)) ne 'textarea');
-
- $$hr{value}=substr($$dr,$start+$len,($stop-$start-$len));
-
-
-
- $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
-
- $key=$$hr{name};
-
- push( @{$LW::forms_current{$key}},
-
- (['textarea',$$hr{value},undef]) );
-
- $parr=\@{$LW::forms_current{$key}->[-1]};
-
- delete $$hr{'name'}; delete $$hr{'value'};
-
-
-
- } else { # button
-
- $$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
-
- $key=$$hr{name};
-
- push( @{$LW::forms_current{$key}},
-
- (['button',$$hr{value},undef]) );
-
- }
-
-
-
- if(scalar %$hr){
-
- my @params=();
-
- foreach $k (keys %$hr){
-
- if(defined $$hr{$k}){
-
- push @params, "$k=\"$$hr{$k}\"";
-
- } else { push @params, $k; }
-
- }
-
- $$parr[2]=\@params;
-
- }
-
-
-
- return undef;
-
- }}
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: html
-
-
-
- The html sub package implements a simple HTML parser.
-
-
-
- =cut
-
-
-
- ################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::html_find_tags
-
-
-
- Params: \$data, \&callback_function [, $xml_flag]
-
- Return: nothing
-
-
-
- LW::html_find_tags parses a piece of HTML and 'extracts' all found tags,
-
- passing the info to the given callback function. The callback function
-
- must accept two parameters: the current tag (as a scalar), and a hash ref
-
- of all the tag's elements. For example, the tag <a href="/file"> will
-
- pass 'a' as the current tag, and a hash reference which contains
-
- {'href'}="/file".
-
-
-
- The xml_flag, when set, causes the parser to do some extra processing
-
- and checks to accomodate XML style tags such as <tag foo="bar"/>.
-
-
-
- Notice: this function is slow! And using it to rewrite (via passback) is
-
- slower! Make sure you have LW::bin installed to get the faster binary
-
- version.
-
-
-
- =cut
-
-
-
- sub html_find_tags {
-
- # use faster binary helper
-
- goto &LW::bin::html_find_tags
-
- if(defined $LW::available{'lw::bin'});
-
-
-
- my ($dataref, $callbackfunc, $xml)=@_;
-
-
-
- return if(!(defined $dataref && ref($dataref) ));
-
- return if(!(defined $callbackfunc && ref($callbackfunc)));
-
- $xml||=0;
-
-
-
- my ($CURTAG, $ELEMENT, $VALUE, $c, $cc);
-
- my ($INCOMMENT,$INTAG,$INSCRIPT,$INCDATA)=(0,0,0,0);
-
- my (%TAG, $ret, $start, $tagstart, $commstart, $scriptstart, $x);
-
-
-
- # YES, this looks like C. In fact, it's my C version ported to
-
- # perl. But it's faster and more dependable than any regex mess
-
- # someone could come up with.
-
- my $LEN = length($$dataref);
-
- for ($c=0; $c<$LEN; $c++){
-
-
-
- $cc=substr($$dataref,$c,1);
-
-
-
- next if(!$INCOMMENT && !$INTAG && !$INSCRIPT && $cc ne '>' && $cc ne '<');
-
-
-
- if($cc eq '<'){
-
- if($INSCRIPT){
-
- if(lc(substr($$dataref,$c+1,7)) eq '/script'){
-
- $INSCRIPT=0;
-
- $TAG{'='}=substr($$dataref, $scriptstart,
-
- $c - $scriptstart - 1);
-
- } else { next; }
-
- }
-
-
-
- next if($INCDATA); # skip tags in xml CDATA section
-
-
-
- if(substr($$dataref,$c+1,3) eq '!--'){
-
- $INCOMMENT=1; $commstart=$c; $c+=3;
-
-
-
- $INCDATA++ if($xml&&substr($$dataref,$c+1,8) eq '![CDATA[');
-
-
-
- } else {
-
- $INTAG=1; $c++;
-
- $c++ while(substr($$dataref,$c,1)=~tr/< \t\r\n//);
-
- $tagstart=$c-1;
-
-
-
- $CURTAG='';
-
- while(($x=substr($$dataref,$c,1))!~tr/ \t\r\n>=// &&
-
- $c < $LEN){
-
- $CURTAG.=$x; $c++;}
-
-
-
- chop $CURTAG if($xml && substr($CURTAG,-1,1) eq '/');
-
- $c++ if($x ne '>');
-
-
-
- $INSCRIPT=1 if($CURTAG eq 'script' && !$xml);
-
- }
-
- $cc=substr($$dataref,$c,1); # refresh current char (cc)
-
- }
-
-
-
- if($cc eq '>'){
-
- if($INSCRIPT){
-
- if($CURTAG eq 'script'){
-
- $scriptstart = $c + 1;
-
- } else { next; }
-
- }
-
-
-
- if($INCDATA && substr($$dataref,$c-2,2) eq ']]'){
-
- $INCDATA=0;
-
- next;
-
- }
-
-
-
- if(!$INCOMMENT && $INTAG){
-
- $INTAG=0;
-
- $TAG{'/'}++ if($xml&&substr($$dataref,$c-1,1) eq '/');
-
- $ret=&$callbackfunc($CURTAG,\%TAG, $dataref,
-
- $tagstart, $c-$tagstart+1);
-
- $c+=$ret if(defined $ret && $ret != 0);
-
- $CURTAG='';
-
- %TAG=();
-
- }
-
- if($INCOMMENT && substr($$dataref,$c-2,2) eq '--'){
-
- $INCOMMENT=0;
-
- $TAG{'='}=substr($$dataref,$commstart+4,
-
- $c-$commstart-3);
-
- $ret=&$callbackfunc('!--',\%TAG, $dataref,
-
- $commstart, $c-$commstart+1);
-
- $c+=$ret if(defined $ret && $ret != 0);
-
- delete $TAG{'='};
-
- next;
-
- }
-
- }
-
-
-
- next if($INCOMMENT);
-
-
-
- if($INTAG){
-
-
-
- $ELEMENT=''; $VALUE='';
-
-
-
- # eat whitespace
-
- $c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
-
-
-
- $start=$c;
-
- $c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n=\>// && $c<$LEN);
-
-
-
- if($c > $start){
-
- $ELEMENT=substr($$dataref,$start,$c-$start);
-
- chop $ELEMENT if($xml&&substr($ELEMENT,-1,1) eq '/');
-
- }
-
-
-
- if(substr($$dataref,$c,1) ne '>'){
-
- # eat whitespace
-
- $c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
-
-
-
- if(substr($$dataref,$c,1) eq '='){
-
- $c++;
-
- $start=$c;
-
- my $p = substr($$dataref,$c,1);
-
- if($p eq '"' || $p eq '\''){
-
- $c++; $start++;
-
- $c++ while(substr($$dataref,$c,1) ne $p &&
-
- $c < $LEN);
-
- $VALUE=substr($$dataref,$start,$c-$start);
-
- $c++;
-
- } else {
-
- $c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n\>// &&
-
- $c < $LEN);
-
- $VALUE=substr($$dataref,$start,$c-$start);
-
- chop $VALUE if($xml&&substr($$dataref,$c-1,2) eq '/>');
-
- }
-
-
-
- # eat whitespace
-
- $c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
-
- }
-
- } # if $c ne '>'
-
- $c--;
-
- $TAG{$ELEMENT}=$VALUE if($ELEMENT ne '' && ($xml && $ELEMENT ne '/'));
-
- }
-
- }}
-
-
-
- ################################################################
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: http
-
-
-
- The http package is the core package of libwhisker. It is responsible
-
- for making the HTTP requests, and parsing the responses. It can handle
-
- HTTP 0.9, 1.0, and 1.1 requests, and allows pretty much every aspect of
-
- the request to be configured and controlled. The HTTP functions use a
-
- HTTP in/out hash, which is a normal perl hash. For outgoing HTTP requests
-
- ('hin' hashes), the keys/values represent outgoing HTTP headers. For HTTP
-
- responses ('hout' hashes), the keys/values represent incoming HTTP
-
- headers. For both, however, there is a special key, 'whisker', whose
-
- value is a hash reference. The whisker control hash contains more
-
- configuration variables, which include host, port, and uri of the desired
-
- request. To access the whisker control hash, use the following
-
- notation: $hash{'whisker'}->{'key'}='value';
-
-
-
- You should view LW::http_init_request() for a list of core whisker control
-
- hash values.
-
-
-
- The internals of the http subpackage will be rewritten shortly--the
-
- current implementation is gross and not very good style. Note that the
-
- API will be unaffected; it will only be an internal reordering. All
-
- references/uses of $$Z will be cleaned up to be more practical/eliminated.
-
-
-
- =cut
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_init_request
-
-
-
- Params: \%request_hash_to_initialize
-
- Return: Nothing (modifies input hash)
-
-
-
- Sets default values to the input hash for use. Sets the host to
-
- 'localhost', port 80, request URI '/', using HTTP 1.1 with GET
-
- method. The timeout is set to 10 seconds, no proxies are defined, and all
-
- URI formatting is set to standard HTTP syntax. It also sets the
-
- Connection (Keep-Alive) and User-Agent headers.
-
-
-
- NOTICE!! It's important to use http_init_request before calling
-
- http_do_request, or http_do_request might puke. Thus, a special magic
-
- value is placed in the hash to let http_do_request know that the hash has
-
- been properly initialized. If you really must 'roll your own' and not use
-
- http_init_request before you call http_do_request, you will at least need
-
- to set the INITIAL_MAGIC value (amongst other things).
-
-
-
- =cut
-
-
-
- sub http_init_request { # doesn't return anything
-
- my ($hin)=shift;
-
-
-
- return if(!(defined $hin && ref($hin)));
-
- %$hin=(); # clear control hash
-
-
-
- # control values
-
- $$hin{'whisker'}={
-
- req_spacer => ' ',
-
- req_spacer2 => ' ',
-
- http_ver => '1.1',
-
- method => 'GET',
-
- method_postfix => '',
-
- port => 80,
-
- uri => '/',
-
- uri_prefix => '',
-
- uri_postfix => '',
-
- uri_param_sep => '?',
-
- host => 'localhost',
-
- http_req_trailer => '',
-
- timeout => 10,
-
- include_host_in_uri => 0,
-
- ignore_duplicate_headers=> 1,
-
- normalize_incoming_headers => 1,
-
- lowercase_incoming_headers => 0,
-
- ssl => 0,
-
- http_eol => "\x0d\x0a",
-
- force_close => 0,
-
- force_open => 0,
-
- retry => 1,
-
- trailing_slurp => 0,
-
- force_bodysnatch => 0,
-
- INITIAL_MAGIC => 31337
-
- };
-
-
-
-
-
- # default header values
-
- $$hin{'Connection'}='Keep-Alive'; # notice it is now default!
-
- $$hin{'User-Agent'}="libwhisker/$LW::VERSION"; # heh
-
- }
-
-
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_do_request
-
-
-
- Params: \%request, \%response [, \%configs]
-
- Return: >=1 if error; 0 if no error (also modifies response hash)
-
-
-
- *THE* core function of libwhisker. LW::http_do_request actually performs
-
- the HTTP request, using the values submitted in %request, and placing result
-
- values in %response. This allows you to resubmit %request in subsequent
-
- requests (%response is automatically cleared upon execution). You can
-
- submit 'runtime' config directives as %configs, which will be spliced into
-
- $hin{'whisker'}->{} before anything else. That means you can do:
-
-
-
- LW::http_do_request(\%req,\%resp,{'uri'=>'/cgi-bin/'});
-
-
-
- This will set $req{'whisker'}->{'uri'}='/cgi-bin/' before execution, and
-
- provides a simple shortcut (note: it does modify %req).
-
-
-
- This function will also retry any requests that bomb out during the
-
- transaction (but not during the connecting phase). This is controlled
-
- by the {whisker}->{retry} value. Also note that the returned error
-
- message in resp is the *last* error received. All retry errors are
-
- put into {whisker}->{retry_errors}, which is an anonymous array.
-
-
-
- Also note that all NTLM auth logic is implemented in http_do_request().
-
- NTLM requires multiple requests in order to work correctly, and so this
-
- function attempts to wrap that and make it all transparent, so that the
-
- final end result is what's passed to the application.
-
-
-
- This function will return 0 on success, 1 on HTTP protocol error, and 2
-
- on non-recoverable network connection error (you can retry error 1, but
-
- error 2 means that the server is totally unreachable and there's no
-
- point in retrying).
-
-
-
- =cut
-
-
-
- sub http_do_request {
-
- my @params = @_;
-
- my $retry_count = ${$params[0]}{'whisker'}->{'retry'} || 0;
-
- my ($ret, @retry_errors, $auth);
-
-
-
- return 1 if(!(defined $params[0] && ref($params[0])));
-
- return 1 if(!(defined $params[1] && ref($params[1])));
-
-
-
- if(defined $params[2]){
-
- foreach (keys %{$params[2]}){
-
- ${$params[0]}{'whisker'}->{$_}=${$params[2]}{$_};}}
-
-
-
- $auth=$params[0]->{'Authorization'} if(defined $params[0]->{'Authorization'});
-
- do {
-
- if(defined $auth && $auth=~/^NTLM/){
-
- $ret=0;
-
- if($params[0]->{'whisker'}->{'ntlm_step'}==0){
-
- $ret=LW::http_do_request_ex($params[0],$params[1]);
-
- return 2 if($ret==2);
-
- if($ret==0){
-
- return 0 if($params[1]->{'whisker'}->{'code'} == 200);
-
- return 1 if($params[1]->{'whisker'}->{'code'} != 401);
-
- $params[0]->{'whisker'}->{'ntlm_step'}=1;
-
- my $thead=utils_find_lowercase_key($params[1],'www-authenticate');
-
- return 1 if(!defined $thead);
-
- return 1 if($thead!~m/^NTLM (.+)$/);
-
- $params[0]->{'Authorization'}='NTLM '.ntlm_client(
-
- $params[0]->{'whisker'}->{'ntlm_obj'},$1);
-
- }
-
- }
-
- if($ret==0){
-
- delete $params[0]->{'Authorization'}
-
- if($params[0]->{'whisker'}->{'ntlm_step'}>1);
-
- $ret=LW::http_do_request_ex($params[0],$params[1]);
-
- $params[0]->{'Authorization'}=$auth;
-
- if($ret>0){ $params[0]->{'whisker'}->{'ntlm_step'}=0;
-
- } else { $params[0]->{'whisker'}->{'ntlm_step'}=2; }
-
- return $ret if($ret==2||$ret==0);
-
- }
-
- } else {
-
- $ret=LW::http_do_request_ex($params[0],$params[1]);
-
- push @{${$params[1]}{'whisker'}->{'retry_errors'}},
-
- @retry_errors if scalar(@retry_errors);
-
- return $ret if($ret==0 || $ret==2);
-
- }
-
- push @retry_errors, ${$params[1]}{'whisker'}->{'error'};
-
- $retry_count--;
-
- } while( $retry_count >= 0);
-
-
-
- # if we get here, we still had errors, but no more retries
-
- return 1;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_do_request_ex
-
-
-
- Params: \%req, \%resp, \%configs
-
- Return: >=1 if error; 0 if no error
-
-
-
- NOTE: you should go through http_do_request(), which calls this function.
-
-
-
- This function actually does all the request work. It is called by
-
- http_do_request(), which has a 'retry wrapper' built into it to catch
-
- errors.
-
-
-
- =cut
-
-
-
- sub http_do_request_ex {
-
- my ($hin, $hout, $hashref)=@_;
-
- my ($temp,$vin,$resp,$S,$a,$b,$vout,@c,$c,$res)=(1,'');
-
- my $W; # shorthand alias for the {'whisker'} hash
-
-
-
- return 1 if(!(defined $hin && ref($hin) ));
-
- return 1 if(!(defined $hout && ref($hout)));
-
-
-
- %$hout=(); # clear output hash
-
- $$hout{whisker}->{uri}=$$hin{whisker}->{uri}; # for tracking purposes
-
- $$hout{whisker}->{'INITIAL_MAGIC'}=31338; # we can tell requests from responses
-
-
-
- if($LW::LW_HAS_SOCKET==0){
-
- $$hout{'whisker'}->{'error'}='Socket support not available';
-
- return 2;}
-
-
-
- if(!defined $$hin{'whisker'} ||
-
- !defined $$hin{'whisker'}->{'INITIAL_MAGIC'} ||
-
- $$hin{'whisker'}->{'INITIAL_MAGIC'}!=31337 ){
-
- $$hout{'whisker'}->{'error'}='Input hash not initialized';
-
- return 2;
-
- }
-
-
-
- if(defined $hashref){
-
- foreach (keys %$hashref){
-
- $$hin{'whisker'}->{$_}=$$hashref{$_};}}
-
-
-
- # if we want anti-IDS, make a copy and setup new values
-
- if(defined $$hin{'whisker'}->{'anti_ids'}){
-
- my %copy=%{$hin};
-
- anti_ids(\%copy,$$hin{'whisker'}->{'anti_ids'});
-
- $W = $copy{'whisker'};
-
- } else {
-
- $W = $$hin{'whisker'};
-
- }
-
-
-
- if($$W{'ssl'}>0 && $LW::LW_HAS_SSL!=1){
-
- $$hout{'whisker'}->{'error'}='SSL not available';
-
- return 2;}
-
-
-
- $TIMEOUT=$$W{'timeout'}||10;
-
-
-
- my $cache_key = defined $$W{'proxy_host'} ?
-
- join(':',$$W{'proxy_host'},$$W{'proxy_port'}) :
-
- join(':',$$W{'host'},$$W{'port'});
-
-
-
- if(!defined $http_host_cache{$cache_key}){
-
- # make new entry
-
- push(@{$http_host_cache{$cache_key}},
-
- undef, # SOCKET $$Z[0]
-
- 0, # $SOCKSTATE $$Z[1]
-
- undef, # INET_ATON $$Z[2]
-
- undef, # $SSL_CTX $$Z[3]
-
- undef, # $SSL_THINGY $$Z[4]
-
- '', # $OUTGOING_QUEUE $$Z[5]
-
- '', # $INCOMING_QUEUE $$Z[6]
-
- 0, # $STATS_SYNS $$Z[7]
-
- 0, # $STATS_REQS $$Z[8]
-
- undef ) # SSL session ID $$Z[9]
-
- }
-
-
-
- # NOTE: the 'Z' reference will be going away in future versions...
-
- $Z = $http_host_cache{$cache_key};
-
-
-
- # use $chost/$cport for actual server we are connecting to
-
- my ($chost,$cport,$cwhat,$PROXY)=('',80,'',0);
-
-
-
- if(defined $$W{'proxy_host'}){
-
- $chost=$$W{'proxy_host'};
-
- $cport=$$W{'proxy_port'}||80;
-
- $cwhat='proxy';
-
- $PROXY=1;
-
-
-
- if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){
-
- $ENV{HTTPS_PROXY} ="$$W{'proxy_host'}:";
-
- $ENV{HTTPS_PROXY}.=$$W{'proxy_port'}||80; }
-
-
-
- } else {
-
- $chost=$$W{'host'};
-
- $cport=$$W{'port'};
-
- $cwhat='host';
-
- }
-
-
-
- if($$Z[1]>0){ # check to see if socket is still alive
-
- if(! sock_valid($Z,$hin,$hout) ){
-
- $$Z[1]=0;
-
- sock_close($$Z[0],$$Z[4]);
-
- } }
-
- # technically we have a race condition: socket can go
-
- # bad before we send request, below. But that's ok,
-
- # we handle the errors down there.
-
-
-
- if($$Z[1]==0){
-
-
-
- my $SOCK = _newsym();
-
- if(defined $$W{'UDP'} && $$W{'UDP'}>0){
-
- if(!socket($SOCK,PF_INET,SOCK_DGRAM,getprotobyname('udp')||0)){
-
- $$hout{'whisker'}->{'error'}='Socket() problems (UDP)';
-
- return 2;}
-
- } else {
-
- if(!socket($SOCK,PF_INET,SOCK_STREAM,getprotobyname('tcp')||0)){
-
- $$hout{'whisker'}->{'error'}='Socket() problems';
-
- return 2;}
-
- }
-
-
-
- $$Z[0]=$SOCK;
-
-
-
- if(defined $$W{'bind_socket'}){
-
- my $port=$$W{'bind_port'}||14011;
-
- my $addr;
-
- if(defined $$W{'bind_addr'}){
-
- $addr=inet_aton($$W{'bind_addr'});
-
- } else {
-
- $addr=INADDR_ANY;
-
- }
-
- if(!bind($SOCK, sockaddr_in($port,$addr))){
-
- $$hout{'whisker'}->{'error'}='Bind() on socket failed';
-
- return 2;
-
- }
-
- }
-
-
-
- $$Z[5]=$$Z[6]=''; # flush in/out queues
-
-
-
- if($$W{'ssl'}>0){ # ssl setup stuff
-
-
-
- if($LW::LW_SSL_LIB==1){
-
- if(!defined($$Z[3])){
-
- if(! ($$Z[3] = Net::SSLeay::CTX_new()) ){
-
- $$hout{'whisker'}->{'error'}="SSL_CTX error: $!";
-
- return 2;}
-
- if(defined $$W{'ssl_rsacertfile'}){
-
- if(! (Net::SSLeay::CTX_use_RSAPrivateKey_file($$Z[3],
-
- $$W{'ssl_rsacertfile'},
-
- &Net::SSLeay::FILETYPE_PEM))){
-
- $$hout{'whisker'}->{'error'}="SSL_CTX_use_rsacert error: $!";
-
- return 2;}
-
- }
-
- if(defined $$W{'ssl_certfile'}){
-
- if(! (Net::SSLeay::CTX_use_certificate_file($$Z[3],
-
- $$W{'ssl_certfile'},
-
- &Net::SSLeay::FILETYPE_PEM))){
-
- $$hout{'whisker'}->{'error'}="SSL_CTX_use_cert error: $!";
-
- return 2;}
-
- }
-
- }
-
- if(! ($$Z[4] = Net::SSLeay::new($$Z[3])) ){
-
- $$hout{'whisker'}->{'error'}="SSL_new error: $!";
-
- return 2;}
-
- if(defined $$W{'ssl_ciphers'}){
-
- if(!(Net::SSLeay::set_cipher_list($$Z[4],
-
- $$W{'ssl_ciphers'}))){
-
- $$hout{'whisker'}->{'error'}="SSL_set_ciphers error: $!";
-
- return 2;}
-
- }
-
- }
-
- }
-
-
-
- $$Z[2]=inet_aton($chost) if(!defined $$Z[2]);
-
- if(!defined $$Z[2]){ # can't find hostname
-
- $$hout{'whisker'}->{'error'}="Can't resolve hostname";
-
- return 2;
-
- }
-
-
-
- if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){
-
- # proxy set in ENV; we always connect to host
-
- $$Z[4]= Net::SSL->new(
-
- PeerAddr => $$hin{'whisker'}->{'host'},
-
- PeerPort => $$hin{'whisker'}->{'port'},
-
- Timeout => $TIMEOUT );
-
- if($@){ $$hout{'whisker'}->{'error'}="Can't connect via SSL: $@[0]";
-
- return 2;}
-
- $$Z[4]->autoflush(1);
-
- } else {
-
- if($LW::LW_NONBLOCK_CONNECT){
-
- my $flags=fcntl($$Z[0],F_GETFL,0);
-
- $flags |= O_NONBLOCK; # set nonblock flag
-
- if(!(fcntl($$Z[0],F_SETFL,$flags))){ # error setting flag
-
- $LW::LW_NONBLOCK_CONNECT=0; # revert to normal
-
- } else {
-
- my $R=connect($$Z[0],sockaddr_in($cport,$$Z[2]));
-
- if(!$R){ # we didn't connect...
-
- if($! != EINPROGRESS){
-
- close($$Z[0]);
-
- $$Z[0]=undef; # this is a bad socket
-
- $$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
-
- return 2;}
-
- vec($vin,fileno($$Z[0]),1)=1;
-
- if(!select(undef,$vin,undef,$TIMEOUT) || !getpeername($$Z[0])){
-
- close($$Z[0]);
-
- $$Z[0]=undef; # this is a bad socket
-
- $$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
-
- return 2;
-
- } }
-
- $flags &= ~O_NONBLOCK; # clear nonblock flag
-
- if(!(fcntl($$Z[0],F_SETFL,$flags))){ # not good!
-
- close($$Z[0]);
-
- $LW::LW_NONBLOCK_CONNECT=0;
-
- $$Z[0]=undef;
-
- $$hout{'whisker'}->{'error'}="Error setting socket to block";
-
- return 2;
-
- } }
-
- }
-
-
-
- if(!defined $$Z[0]){ # this is a safety catch
-
- $$hout{'whisker'}->{'error'}="Error creating valid socket connection";
-
- return 2; }
-
-
-
- if($LW::LW_NONBLOCK_CONNECT==0){ # attempt to do a timeout alarm...
-
- eval {
-
- local $SIG{ALRM} = sub { die "timeout\n" };
-
- eval {alarm($TIMEOUT)};
-
- if(!connect($$Z[0],sockaddr_in($cport,$$Z[2]))){
-
- alarm(0);
-
- die("no_connect\n"); }
-
- eval {alarm(0)};
-
- };
-
- if($@ || !(defined $$Z[0])){
-
- $$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
-
- return 2;
-
- } }
-
-
-
- binmode($$Z[0]); # stupid Windows
-
- # same as IO::Handle->autoflush(1), without importing 1000+ lines
-
- my $S=select($$Z[0]);
-
- $|++; select($S);
-
- }
-
-
-
- $$Z[1]=1; $$Z[7]++;
-
-
-
- if($$W{'ssl'}>0){
-
-
-
- if($LW::LW_SSL_LIB==1){
-
-
-
- if($PROXY){ # handle the proxy CONNECT stuff...
-
- my $SSL_CONNECT = "CONNECT $$W{'host'}".
-
- ":$$W{'port'}/ HTTP/1.0\n\n";
-
- syswrite($$Z[0],$SSL_CONNECT, length($SSL_CONNECT)); }
-
-
-
- Net::SSLeay::set_fd($$Z[4], fileno($$Z[0]));
-
- Net::SSLeay::set_session($$Z[4],$$Z[9]) if(defined $$Z[9]);
-
- if(! (Net::SSLeay::connect($$Z[4])) ){
-
- $$hout{'whisker'}->{'error'}="SSL_connect error: $!";
-
- sock_close($$Z[0],$$Z[4]); return 2;}
-
-
-
- if(defined $$W{'save_ssl_info'} &&
-
- $$W{'save_ssl_info'}>0){
-
- ssl_save_info($hout,$$Z[4]); }
-
- my $x=Net::SSLeay::ctrl($$Z[4],6,0,'');
-
- $$Z[9]=Net::SSLeay::get_session($$Z[4]) unless(defined $$W{'ssl_resume'} &&
-
- $$W{'ssl_resume'}==0);
-
- }
-
-
-
- } else {
-
- $$Z[4]=undef;
-
- }
-
- }
-
-
-
- if(defined $$W{'ids_session_splice'} &&
-
- $$W{'ids_session_splice'}>0 &&
-
- $$W{'ssl'}==0){ # no session_spice over ssl
-
- setsockopt($$Z[0],SOL_SOCKET,SO_SNDLOWAT,1);
-
- @c=split(//, &http_req2line($hin));
-
- # notice we bypass queueing here, in order to trickle the packets
-
- my $ss;
-
- foreach $c (@c){
-
- $ss=syswrite($$Z[0],$c,1); # char size assumed to be 1
-
- if(!defined $ss || $ss==0){
-
- $$hout{'whisker'}->{'error'}="Error sending session splice request to server";
-
- sock_close($$Z[0],$$Z[4]); return 1;
-
- }
-
- select(undef,undef,undef,.1);
-
- }
-
- } else {
-
- http_queue(http_req2line($hin)); }
-
-
-
- $$Z[8]++;
-
-
-
- if($$W{'http_ver'} ne '0.9'){
-
- my %SENT;
-
- if(defined $$W{'header_order'} && ref($$W{'header_order'})){
-
- foreach (@{$$W{'header_order'}}){
-
- next if($_ eq '' || $_ eq 'whisker');
-
- if(ref($$hin{$_})){
-
- $SENT{$_}||=0;
-
- my $v=$$hin{$_}->[$SENT{$_}];
-
- http_queue("$_: $v$$W{'http_eol'}");
-
- } else {
-
- http_queue("$_: $$hin{$_}$$W{'http_eol'}");
-
- }
-
- $SENT{$_}++;
-
- }
-
- }
-
-
-
- foreach (keys %$hin){
-
- next if($_ eq '' || $_ eq 'whisker');
-
- next if(defined $SENT{$_});
-
- if(ref($$hin{$_})){ # header with multiple values
-
- my $key=$_;
-
- foreach (@{$$hin{$key}}){
-
- http_queue("$key: $_$$W{'http_eol'}");}
-
- } else { # normal header
-
- http_queue("$_: $$hin{$_}$$W{'http_eol'}");
-
- }
-
- }
-
-
-
- if(defined $$W{'raw_header_data'}){
-
- http_queue($$W{'raw_header_data'});}
-
-
-
- http_queue($$W{'http_eol'});
-
-
-
- if(defined $$W{'data'}){
-
- http_queue($$W{'data'});}
-
-
-
- } # http 0.9 support
-
-
-
- # take a MD5 of queue, if wanted
-
- if(defined $$W{'queue_md5'}){
-
- $$hout{'whisker'}->{'queue_md5'}= LW::md5($$Z[5]);
-
- }
-
-
-
-
-
- # all data is wrangled...actually send it now
-
- if($res=http_queue_send($$Z[0],$$Z[4])){
-
- $$hout{'whisker'}->{'error'}="Error sending request to server: $res";
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
-
-
- undef $vin;
-
- if(defined $$Z[4]){
-
- if($LW::LW_SSL_LIB==1){ # Net::SSLeay
-
- shutdown $$Z[0], 1;
-
- vec($vin,fileno($$Z[0]),1)=1;
-
- } else { # Net::SSL
-
- shutdown $$Z[4], 1;
-
- vec($vin,fileno($$Z[4]),1)=1;
-
- }
-
- } else {
-
- vec($vin,fileno($$Z[0]),1)=1;
-
- }
-
-
-
- if(!select($vin,undef,undef,$TIMEOUT)){
-
- $$hout{'whisker'}->{'error'}="Server read timed out";
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
-
-
- my ($LC,$CL,$TE,$CO)=('',-1,'',''); # extra header stuff
-
-
-
- $$hout{'whisker'}->{'lowercase_incoming_headers'} =
-
- $$W{'lowercase_incoming_headers'};
-
-
-
- if($$W{'http_ver'} ne '0.9'){
-
-
-
- do { # catch '100 Continue' responses
-
- $resp=sock_getline($$Z[0],$$Z[4]);
-
- #$resp=~tr/\r\n//d if(defined $resp);
-
-
-
- if(!defined $resp){
-
- $$hout{'whisker'}->{'error'}='Error reading HTTP response';
-
- if($!){ # this should be left over from sysread via sock_getline
-
- $$hout{'whisker'}->{'error'}.=": $!"; }
-
- $$hout{'whisker'}->{'data'}=$$Z[6];
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers
-
- return 1;}
-
-
-
- if(defined $$W{'save_raw_headers'}){
-
- $$hout{'whisker'}->{'raw_header_data'}.=$resp;}
-
-
-
- if($resp!~/^HTTP\/([0-9.]{3})[ \t]+(\d+)[ \t]{0,1}(.*?)[\r\n]+/){
-
- $$hout{'whisker'}->{'error'}="Invalid HTTP response: $resp";
-
- # let's save the incoming data...we might want it
-
- $$hout{'whisker'}->{'data'}=$resp;
-
- while(defined ($_=sock_getline($$Z[0],$$Z[4]))){
-
- $$hout{'whisker'}->{'data'}.=$_;}
-
- # normally we'd check the results to see if socket is closed, but
-
- # we close it anyway, so it doesn't matter
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers
-
- return 1;}
-
-
-
- $$hout{'whisker'}->{'http_ver'} = $1;
-
- $$hout{'whisker'}->{'http_resp'} = $2;
-
- $$hout{'whisker'}->{'http_resp_message'}= $3;
-
- $$hout{'whisker'}->{'code'} = $2;
-
-
-
- $$hout{'whisker'}->{'100_continue'}++ if($2 == 100);
-
-
-
- while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ # check pertinent headers
-
-
-
- if(defined $$W{'save_raw_headers'}){
-
- $$hout{'whisker'}->{'raw_header_data'}.=$_;}
-
-
-
- $_=~s/[\r]{0,1}\n$//; # anchored regex, so it's fast
-
- last if ($_ eq ''); # acceptable assumption case?
-
-
-
- my $l2=index($_,':'); # this is faster than regex
-
- $a=substr($_,0,$l2);
-
- $b=substr($_,$l2+1);
-
- $b=~s/^([ \t]*)//; # anchored regex, so it's fast
-
-
-
- $hout{'whisker'}->{'abnormal_header_spacing'}++ if($1 ne ' ');
-
-
-
- $LC = lc($a);
-
- next if($LC eq 'whisker');
-
- $TE = lc($b) if($LC eq 'transfer-encoding');
-
- $CL = $b if($LC eq 'content-length');
-
- $CO = lc($b) if($LC eq 'connection');
-
-
-
- if($$W{'lowercase_incoming_headers'}>0){
-
- $a=$LC;
-
- } elsif($$W{'normalize_incoming_headers'}>0){
-
- $a=~s/(-[a-z])/uc($1)/eg;
-
- }
-
-
-
- # save the received header order, in case we're curious
-
- push(@{$$hout{'whisker'}->{'recv_header_order'}},$a);
-
-
-
- if(defined $$hout{$a} && $$W{'ignore_duplicate_headers'}!=1){
-
- if(!ref($$hout{$a})){
-
- my $temp=$$hout{$a};
-
- delete $$hout{$a};
-
- push(@{$$hout{$a}},$temp);
-
- }
-
- push(@{$$hout{$a}},$b);
-
- } else {
-
- $$hout{$a}=$b;
-
- } }
-
-
-
- # did we have a socket error?
-
- if($!){
-
- $hout{'whisker'}->{'error'}='Error in reading response/headers';
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1; }
-
-
-
- if( $CO eq '' ){ # do whatever the client wanted
-
- $CO = (defined $$hin{'Connection'}) ? lc($$hin{'Connection'}) :
-
- 'close'; }
-
-
-
- } while($$hout{'whisker'}->{'http_resp'}==100);
-
-
-
- } else { # http ver 0.9, we need to fake it
-
- # Keep in mind lame broken servers, like IIS, still send headers for
-
- # 0.9 requests; the headers are treated as data. Also keep in mind
-
- # that if the server doesn't support HTTP 0.9 requests, it will spit
-
- # back an HTTP 1.0 response header. User is responsible for figuring
-
- # this out himself.
-
- $$hout{'whisker'}->{'http_ver'}='0.9';
-
- $$hout{'whisker'}->{'http_resp'}='200';
-
- $$hout{'whisker'}->{'http_resp_message'}='';
-
- }
-
-
-
- if($$W{'force_bodysnatch'} || ( $$W{'method'} ne 'HEAD' &&
-
- $$hout{'whisker'}->{'http_resp'}!=206 &&
-
- $$hout{'whisker'}->{'http_resp'}!=102)){
-
- if ($TE eq 'chunked') {
-
- if(!defined ($a=sock_getline($$Z[0],$$Z[4]))){
-
- $$hout{'whisker'}->{'error'}='Error reading chunked data length';
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
- $a=~tr/a-fA-F0-9//cd; $CL=hex($a);
-
- $$hout{'whisker'}->{'data'}='';
-
- while($CL!=0) { # chunked sucks
-
- if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){
-
- $$hout{'whisker'}->{'error'}="Error reading chunked data: $!";
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
- $$hout{'whisker'}->{'data'}=$$hout{'whisker'}->{'data'} . $temp;
-
- $temp=sock_getline($$Z[0], $$Z[4]);
-
- ($temp=sock_getline($$Z[0], $$Z[4])) if(defined $temp &&
-
- $temp=~/^[\r\n]*$/);
-
- if(!defined $temp){ # this will catch errors in either sock_getline
-
- $$hout{'whisker'}->{'error'}="Error reading chunked data: $!";
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
- $temp=~tr/a-fA-F0-9//cd; $CL=hex($temp);}
-
- # read in trailer headers
-
- while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ tr/\r\n//d; last if($_ eq ''); }
-
- # Hmmmm...error, but we should have full body. Don't return error
-
- if($!){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
-
- } else {
-
- if ($CL != -1) {
-
- if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){
-
- $$hout{'whisker'}->{'error'}="Error reading data: $!";
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
-
- } else { # Yuck...read until server stops sending....
-
- $temp=sock_getall($$Z[0],$$Z[4]);
-
- # we go until we puke, so close socket and don't return error
-
- sock_close($$Z[0],$$Z[4]); $$Z[1]=0;
-
- }
-
- $$hout{'whisker'}->{'data'}=$temp;
-
- }
-
- } # /method ne HEAD && http_resp ne 206 or 102/
-
-
-
- if(($CO ne 'keep-alive' || ( defined $$hin{'Connection'} &&
-
- lc($$hin{'Connection'}) eq 'close')) && $$W{'force_open'}!=1){
-
- $$Z[1]=0; sock_close($$Z[0],$$Z[4]);
-
- }
-
-
-
- # this way we know what the state *would* have been...
-
- $$hout{'whisker'}->{'sockstate'}=$$Z[1];
-
- if($$W{'force_close'}>0) {
-
- $$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
-
-
-
- if($$W{'ssl'}>0){ # we don't reuse SSL sockets
-
- $$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
-
-
-
- $$hout{'whisker'}->{'stats_reqs'}=$$Z[8];
-
- $$hout{'whisker'}->{'stats_syns'}=$$Z[7];
-
- $$hout{'whisker'}->{'error'}=''; # no errors
-
- return 0;
-
- }
-
-
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_req2line (INTERNAL)
-
-
-
- Params: \%hin, $switch
-
- Return: $request
-
-
-
- req2line is used internally by LW::http_do_request, as well as provides a
-
- convienient way to turn a %hin configuration into an actual HTTP request
-
- line. If $switch is set to 1, then the returned $request will be the URI
-
- only ('/requested/page.html'), versus the entire HTTP request ('GET
-
- /requested/page.html HTTP/1.0\n\n'). Also, if the 'full_request_override'
-
- whisker config variable is set in %hin, then it will be returned instead
-
- of the constructed URI.
-
-
-
- =cut
-
-
-
- sub http_req2line {
-
- my ($S,$hin,$UO)=('',@_);
-
- $UO||=0; # shut up -w warning
-
-
-
- # notice: full_request_override can play havoc with proxy settings
-
- if(defined $$hin{'whisker'}->{'full_request_override'}){
-
- return $$hin{'whisker'}->{'full_request_override'};
-
-
-
- } else { # notice the components of a request--this is for flexibility
-
-
-
- if($UO!=1){$S.= $$hin{'whisker'}->{'method'}.
-
- $$hin{'whisker'}->{'method_postfix'}.
-
- $$hin{'whisker'}->{'req_spacer'};
-
-
-
- if($$hin{'whisker'}->{'include_host_in_uri'}>0){
-
- $S.= 'http://';
-
-
-
- if(defined $$hin{'whisker'}->{'uri_user'}){
-
- $S.= $$hin{'whisker'}->{'uri_user'};
-
- if(defined $$hin{'whisker'}->{'uri_password'}){
-
- $S.= ':'.$$hin{'whisker'}->{'uri_user'};
-
- }
-
- $S.= '@';
-
- }
-
-
-
- $S.= $$hin{'whisker'}->{'host'}.
-
- ':'.$$hin{'whisker'}->{'port'};}}
-
-
-
- $S.= $$hin{'whisker'}->{'uri_prefix'}.
-
- $$hin{'whisker'}->{'uri'}.
-
- $$hin{'whisker'}->{'uri_postfix'};
-
-
-
- if(defined $$hin{'whisker'}->{'uri_param'}){
-
- $S.= $$hin{'whisker'}->{'uri_param_sep'}.
-
- $$hin{'whisker'}->{'uri_param'};}
-
-
-
- if($UO!=1){ if($$hin{'whisker'}->{'http_ver'} ne '0.9'){
-
- $S.= $$hin{'whisker'}->{'req_spacer2'}.'HTTP/'.
-
- $$hin{'whisker'}->{'http_ver'}.
-
- $$hin{'whisker'}->{'http_req_trailer'};}
-
- $S.= $$hin{'whisker'}->{'http_eol'};}}
-
- return $S;}
-
-
-
-
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function LW::sock_close (INTERNAL)
-
-
-
- Params: $socket_file_descriptor, $SSL_THINGY
-
- Return: nothing
-
-
-
- This function will close the indicated socket and SSL connection (if
-
- necessary). They are wrapped in eval()s to make sure if the functions
-
- puke, it doesn't kill the entire program.
-
-
-
- =cut
-
-
-
- sub sock_close {
-
- my ($fd,$ssl)=@_;
-
-
-
- if(defined $ssl){
-
- if($LW::LW_SSL_LIB==1){ # Net::SSLeay
-
- eval "&Net::SSLeay::free($ssl)";
-
- # eval "&Net::SSLeay::CTX_free($$Z[3])";
-
- } else { # Net::SSL
-
- eval { close($ssl) }; # is that right for Net::SSL?
-
- }
-
- }
-
- eval { close($fd); };
-
-
-
- $$Z[4]=undef;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function LW::sock_valid (INTERNAL)
-
-
-
- Params: $Z reference, \%hin, \%hout
-
- Return: 1 if socket valid, 0 if socket disconnected
-
-
-
- This is an internal function used to determine if a socket is
-
- still good (i.e. the other END hasn't closed the connection).
-
- This really only applies to persistent (Keep-Alive) connections.
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub sock_valid {
-
- my ($z,$Hin,$Hout)=@_;
-
-
-
- my $slurp=$$Hin{'whisker'}->{'trailing_slurp'};
-
- my ($o,$vin)=(undef,'');
-
-
-
- return 0 if(defined $$z[3]); # we don't do SSL yet
-
-
-
- # closed socket sets read flag (and so does waiting data)
-
- vec($vin,fileno($$z[0]),1)=1;
-
- if(select(($o=$vin),undef,undef,.01)){ # we have data to read
-
- my ($hold, $res);
-
-
-
- do {
-
- $res = sysread($$z[0], $hold, 4096);
-
- $$z[6].=$hold if($slurp==0); # save to queue
-
- $$Hout{'whisker'}->{'slurped'}.="$hold\0"
-
- if($slurp==1); # save to hout hash
-
- # fall through value of 2 doesn't do anything
-
- } while ($res && select(($o=$vin),undef,undef,.01));
-
-
-
- if(!defined $res || $res==0){ # error or EOF
-
- return 0;
-
- }
-
- }
-
-
-
- return 1;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::sock_getline (INTERNAL)
-
-
-
- Params: $socket_file_descriptor, $SSL_THINGY
-
- Return: $string, undef on error (timeout)
-
-
-
- This function is used internally to read a line of input (up to a '\n')
-
- from the given socket file descriptor (regular or SSL).
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub sock_getline { # read from socket w/ timeouts
-
- my ($fd,$ssl) = @_;
-
- my ($str,$t)=('','');
-
-
-
- $t = index($$Z[6],"\n",0);
-
-
-
- while($t < 0){
-
- return undef if &http_queue_read($fd,$ssl);
-
- $t=index($$Z[6],"\n",0);
-
- }
-
-
-
- # MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines
-
- # my $r;
-
- # ($r,$$Z[6])=unpack('A'.($t+1).'A*',$$Z[6]);
-
- # return $r;
-
-
-
- # SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines
-
- # return substr($$Z[6],0,$t+1,'');
-
-
-
- # LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines
-
- my $r = substr($$Z[6],0,$t+1);
-
- substr($$Z[6],0,$t+1)='';
-
- return $r;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::sock_get (INTERNAL)
-
-
-
- Params: $socket_file_descriptor, $SSL_THINGY, required $amount
-
- Return: $string, undef on error
-
-
-
- This function is used internally to read input from the given socket
-
- file descriptor (regular or SSL). Will abort/error if $amount is not
-
- available.
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub sock_get { # read from socket w/ timeouts
-
- my ($fd,$ssl,$amount) = @_;
-
- my ($str,$t)=('','');
-
-
-
- while($amount > length($$Z[6])){
-
- return undef if &http_queue_read($fd,$ssl);
-
- }
-
-
-
- # MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines
-
- # my $r;
-
- # ($r,$$Z[6])=unpack('A'.$amount.'A*',$$Z[6]);
-
- # return $r;
-
-
-
- # SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines
-
- # return substr($$Z[6],0,$amount,'');
-
-
-
- # LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines
-
- my $r = substr($$Z[6],0,$amount);
-
- substr($$Z[6],0,$amount)='';
-
- return $r;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::sock_getall (INTERNAL)
-
-
-
- Params: $socket_file_descriptor, $SSL_THINGY
-
- Return: $string
-
-
-
- This function is used internally to read input from the given socket
-
- file descriptor (regular or SSL). It will return everything received
-
- until an error (no data or real error) occurs.
-
-
-
- This function is not intended for external use.
-
-
-
- =cut
-
-
-
- sub sock_getall {
-
- my ($fd,$ssl) = @_;
-
- 1 while( !(&http_queue_read($fd,$ssl)) );
-
- return $$Z[6];
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_queue_read (INTERNAL)
-
-
-
- Params: $fd, $ssl
-
- Return: $character, undef on error (timeout)
-
-
-
- http_queue_read() will put incoming data from the server into
-
- the incoming queue for reading. If there's no more data (or
-
- on error), it will return 1. Otherwise it returns 0.
-
-
-
- This function is really for internal use only.
-
-
-
- =cut
-
-
-
- sub http_queue_read {
-
- my ($fd,$ssl)=@_;
-
- my ($vin, $t)=('','');
-
-
-
- if(defined $ssl){
-
- if($LW::LW_SSL_LIB==1){ # Net::SSLeay
-
- local $SIG{ALRM} = sub { die "timeout\n" };
-
- local $SIG{PIPE} = sub { die "pipe_error\n" };
-
- eval {
-
- eval { alarm($TIMEOUT); };
-
- $t=Net::SSLeay::read($ssl);
-
- eval { alarm(0); };
-
- };
-
- if($@ || !defined $t || $t eq ''){
-
- return 1;}
-
- $$Z[6].=$t;
-
- } else { # Net::SSL
-
- if(!$ssl->read($t,1024)){ return 1;
-
- } else { $$Z[6].=$t;}
-
- }
-
- } else {
-
- vec($vin,fileno($fd),1)=1; # wait only so long to read...
-
- if(!select($vin,undef,undef,$TIMEOUT)){
-
- return 1;}
-
- if(!sysread($fd,$t,4096)){ return 1; # EOF or error
-
- } else { $$Z[6].=$t;}
-
- }
-
-
-
- return 0;
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_queue_send (INTERNAL)
-
-
-
- Params: $sock, $ssl
-
- Return: $status_result (undef=ok, else error message)
-
-
-
- This functions sends the current queue (made with LW::http_queue) to the
-
- server via the specified SSL or socket connection.
-
-
-
- =cut
-
-
-
- sub http_queue_send { # write to socket
-
- my ($fd,$ssl)=@_;
-
- my ($v,$wrote,$err)=('');
-
-
-
- my $len = length($$Z[5]);
-
- if(defined $ssl){
-
- if($LW::LW_SSL_LIB==1){ # Net::SSLeay
-
- ($wrote,$err)=Net::SSLeay::ssl_write_all($ssl,$$Z[5]);
-
- return 'Could not send entire data queue' if ($wrote!=$len);
-
- return "SSL_write error: $err" unless $wrote;
-
- } else { # Net::SSL
-
- $ssl->print($$Z[5]);
-
- }
-
- } else {
-
- vec($v,fileno($fd),1)=1;
-
- if(!select(undef,$v,undef,.01)){
-
- return 'Socket write test failed'; }
-
- $wrote=syswrite($fd,$$Z[5],length($$Z[5]));
-
- return "Error sending data queue: $!" if(!defined $wrote);
-
- return 'Could not send entire data queue' if ($wrote != $len);
-
- }
-
- $$Z[5]=''; return undef;
-
- }
-
-
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_queue (INTERNAL)
-
-
-
- Params: $data
-
- Return: nothing
-
-
-
- This function will buffer the output to be sent to the server. Output is
-
- buffered for various reasons (particularlly because of SSL, but also
-
- allowing the chance to 'go back' and modify the final output before it's
-
- actually sent (after header constructions, etc).
-
-
-
- =cut
-
-
-
- sub http_queue {
-
- $$Z[5].= shift;
-
- }
-
-
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_fixup_request
-
-
-
- Params: $hash_ref
-
- Return: Nothing
-
-
-
- This function takes a %hin hash reference and makes sure the proper
-
- headers exist (for example, it will add the Host: header, calculate the
-
- Content-Length: header for POST requests, etc). For standard requests
-
- (i.e. you want the request to be HTTP RFC-compliant), you should call this
-
- function right before you call LW::http_do_request.
-
-
-
- =cut
-
-
-
- sub http_fixup_request {
-
- my $hin=shift;
-
-
-
- return if(!(defined $hin && ref($hin)));
-
-
-
- if($$hin{'whisker'}->{'http_ver'} eq '1.1'){
-
- $$hin{'Host'}=$$hin{'whisker'}->{'host'} if(!defined $$hin{'Host'});
-
- $$hin{'Connection'}='Keep-Alive' if(!defined $$hin{'Connection'});
-
- }
-
-
-
- if(defined $$hin{'whisker'}->{'data'}){
-
- if(!defined $$hin{'Content-Length'}){
-
- $$hin{'Content-Length'}=length($$hin{'whisker'}->{'data'});}
-
- # if(!defined $$hin{'Content-Encoding'}){
-
- # $$hin{'Content-Encoding'}='application/x-www-form-urlencoded';}
-
- }
-
-
-
- if(defined $$hin{'whisker'}->{'proxy_host'}){
-
- $$hin{'whisker'}->{'include_host_in_uri'}=1;}
-
-
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::http_reset
-
-
-
- Params: Nothing
-
- Return: Nothing
-
-
-
- The LW::http_reset function will walk through the %http_host_cache,
-
- closing all open sockets and freeing SSL resources. It also clears
-
- out the host cache in case you need to rerun everything fresh.
-
-
-
- =cut
-
-
-
- sub http_reset {
-
- my $key;
-
-
-
- foreach $key (keys %http_host_cache){
-
- # *Z=$http_host_cache{$key};
-
- sock_close($http_host_cache{$key}->[0],
-
- $http_host_cache{$key}->[4]);
-
- my $x=$http_host_cache{$key}->[3];
-
- if(defined $x && $LW::LW_SSL_LIB==1){
-
- eval "Net::SSLeay::CTX_free($x)"; }
-
- delete $http_host_cache{$key};
-
- }
-
- }
-
-
-
- ##################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ssl_save_info (INTERNAL)
-
-
-
- Params: \%hout, $ssl_connection
-
- Return: Nothing
-
-
-
- This is an internal function used to save various Net::SSLeay
-
- information into the given hash. Triggered by setting
-
- {'whisker'}->{'save_ssl_info'}=1.
-
-
-
- =cut
-
-
-
- sub ssl_save_info {
-
- my ($hr,$SSL)=@_;
-
- my $cert;
-
-
-
- return if($LW::LW_SSL_LIB!=1); # only Net::SSLeay used
-
- $$hr{'whisker'}->{'ssl_cipher'}=Net::SSLeay::get_cipher($SSL);
-
-
-
- if( $cert = Net::SSLeay::get_peer_certificate($SSL)){
-
- $$hr{'whisker'}->{'ssl_cert_subject'} =
-
- Net::SSLeay::X509_NAME_oneline(
-
- Net::SSLeay::X509_get_subject_name($cert) );
-
-
-
- $$hr{'whisker'}->{'ssl_cert_issuer'} =
-
- Net::SSLeay::X509_NAME_oneline(
-
- Net::SSLeay::X509_get_issuer_name($cert) );
-
- }
-
- }
-
-
-
- ##################################################################
-
-
-
- { $SYMCOUNT = 0;
-
- sub _newsym { # same as Symbol::gensym; taken from libwhisker2
-
- my $pkg="LW::";
-
- my $name = "_STREAM_" . $SYMCOUNT++;
-
- delete $$pkg{$name};
-
- return \*{$pkg.$name};
-
- }}
-
-
-
- ##################################################################
-
- =pod
-
-
-
- =head1 ++ Sub package: mdx
-
-
-
- The mdx subpackage contains support for making MD4 and MD5 hashes of the
-
- given data. It will attempt to use a faster perl module if installed,
-
- and will fall back on the internal perl version (which is *slow* in
-
- comparison) if nothing else was found.
-
-
-
- This was written in a few hours using the explanation of Applied
-
- Cryptography as the main reference, and Digest::Perl::MD5 as a secondary
-
- reference. MD4 was later added, using Authen::NTLM::MD4 as a reference.
-
-
-
- This code should be cross-platform (particularly 64-bit) compatible; if
-
- you get errors, contact rfp@wiretrip.net.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- { # start md5 packaged varbs
-
- my (@S,@T,@M);
-
- my $code='';
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md5
-
-
-
- Params: $data
-
- Return: $hex_md5_string
-
-
-
- This function takes a data scalar, and composes a MD5 hash of it, and
-
- returns it in a hex ascii string. It will use the fastest MD5 function
-
- available.
-
-
-
- =cut
-
-
-
- sub md5 {
-
- return undef if(!defined $_[0]); # oops, forgot the data
-
- return MD5->hexhash($_[0]) if(defined $LW::available{'md5'});
-
- return md5_perl($_[0]);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md5_perl
-
-
-
- Params: $data
-
- Return: $hex_md5_string
-
-
-
- This is the perl implementation of the MD5 function. You should use
-
- the md5() function, which will call this function as a last resort.
-
- You can call this function directly if you want to test the code.
-
-
-
- =cut
-
-
-
- sub md5_perl {
-
- my $DATA=shift;
-
- $DATA=md5_pad($DATA);
-
- &md5_init() if(!defined $M[0]);
-
- return md5_perl_generated(\$DATA);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md5_init (INTERNAL)
-
-
-
- Params: nothing
-
- Return: nothing
-
-
-
- This function generates particular values used in the md5_perl function.
-
- Normally you do not have to call it, as md5_perl will call it if needed.
-
- The values here are special MD5 constants.
-
-
-
- =cut
-
-
-
- sub md5_init {
-
- return if(defined $S[0]);
-
- for(my $i=1; $i<=64; $i++){ $T[$i-1]=int((2**32)*abs(sin($i))); }
-
- my @t=(7,12,17,22,5,9,14,20,4,11,16,23,6,10,15,21);
-
- for($i=0; $i<64; $i++){ $S[$i]=$t[(int($i/16)*4)+($i%4)]; }
-
- @M=( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-
- 1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12,
-
- 5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2,
-
- 0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9 );
-
- &md5_generate();
-
-
-
- # check to see if it works correctly
-
- my $TEST=md5_pad('foobar');
-
- if( md5_perl_generated(\$TEST) ne
-
- '3858f62230ac3c915f300c664312c63f'){
-
- die('Error: MD5 self-test not successful.');
-
- }
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md5_pad (INTERNAL)
-
-
-
- Params: $data
-
- Return: $padded_data
-
-
-
- This function pads the data to be compatible with MD5.
-
-
-
- This function is from Digest::Perl::MD5, and bears the following
-
- copyrights:
-
-
-
- Copyright 2000 Christian Lackas, Imperia Software Solutions
-
- Copyright 1998-1999 Gisle Aas.
-
- Copyright 1995-1996 Neil Winton.
-
- Copyright 1991-1992 RSA Data Security, Inc.
-
-
-
- =cut
-
-
-
- sub md5_pad {
-
- my $l = length(my $msg=shift() . chr(128));
-
- $ msg .= "\0" x (($l%64<=56?56:120)-$l%64);
-
- $l=($l-1)*8;
-
- $msg .= pack 'VV',$l & 0xffffffff, ($l >> 16 >> 16);
-
- return $msg;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md5_generate (INTERNAL)
-
-
-
- Params: none
-
- Return: none
-
-
-
- This functions generates and compiles the actual MD5 function. It's
-
- faster to have all the operations inline and in order than to call
-
- functions. Generating the code via below function cuts the final
-
- code savings to about 1/50th, with the penalty of having to compile
-
- it the first time it's used (which takes all of a second or two).
-
-
-
- =cut
-
-
-
- sub md5_generate {
-
- my $N='abcddabccdabbcda';
-
- my $M='';
-
- $M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems
-
-
-
- $code=<<EOT;
-
- sub md5_perl_generated {
-
- BEGIN { \$^H |= 1; }; # use integer
-
- my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);
-
- my (\$a,\$b,\$c,\$d,\$t,\$i);
-
- my \$dr=shift;
-
- my \$l=length(\$\$dr);
-
- for my \$L (0 .. ((\$l/64)-1) ) {
-
- my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));
-
- (\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);
-
- EOT
-
-
-
- for($i=0; $i<16; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
-
- }
-
- for(; $i<32; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=((\$$c^(\$$d\&(\$$b^\$$c)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
-
- }
-
- for(; $i<48; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=((\$$b^\$$c^\$$d)+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
-
- }
-
- for(; $i<64; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=((\$$c^(\$$b|(~\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
-
- }
-
-
-
- $code.=<<EOT;
-
- \$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;
-
- \$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;
-
- } # for
-
- return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }
-
- EOT
-
- eval "$code";
-
- }
-
-
-
- } # md5 package container
-
-
-
- ########################################################################
-
-
-
- { # start md4 packaged varbs
-
- my (@S,@T,@M);
-
- my $code='';
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md4
-
-
-
- Params: $data
-
- Return: $hex_md4_string
-
-
-
- This function takes a data scalar, and composes a MD4 hash of it, and
-
- returns it in a hex ascii string. It will use the fastest MD4 function
-
- available.
-
-
-
- =cut
-
-
-
- sub md4 {
-
- return undef if(!defined $_[0]); # oops, forgot the data
-
- md4_perl(@_);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md4_perl
-
-
-
- Params: $data
-
- Return: $hex_md4_string
-
-
-
- This is the perl implementation of the MD4 function. You should use
-
- the md4() function, which will call this function as a last resort.
-
- You can call this function directly if you want to test the code.
-
-
-
- =cut
-
-
-
- sub md4_perl {
-
- my $DATA=shift;
-
- $DATA=md5_pad($DATA);
-
- &md4_init() if(!defined $M[0]);
-
- return md4_perl_generated(\$DATA);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::md4_init (INTERNAL)
-
-
-
- Params: none
-
- Return: none
-
-
-
- This functions generates and compiles the actual MD4 function. It's
-
- faster to have all the operations inline and in order than to call
-
- functions. Generating the code via below function cuts the final
-
- code savings to about 1/50th, with the penalty of having to compile
-
- it the first time it's used (which takes all of a second or two).
-
-
-
- =cut
-
-
-
- sub md4_init {
-
- return if(defined $S[0]);
-
- my @t=(3,7,11,19,3,5,9,13,3,9,11,15);
-
- for($i=0; $i<48; $i++){ $S[$i]=$t[(int($i/16)*4)+($i%4)]; }
-
- @M=( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-
- 0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,
-
- 0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 );
-
-
-
- my $N='abcddabccdabbcda';
-
- my $M='';
-
- $M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems
-
-
-
- $code=<<EOT;
-
- sub md4_perl_generated {
-
- BEGIN { \$^H |= 1; }; # use integer
-
- my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);
-
- my (\$a,\$b,\$c,\$d,\$t,\$i);
-
- my \$dr=shift;
-
- my \$l=length(\$\$dr);
-
- for my \$L (0 .. ((\$l/64)-1) ) {
-
- my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));
-
- (\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);
-
- EOT
-
-
-
- for($i=0; $i<16; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]])$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
-
- }
-
- for(; $i<32; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=(( (\$$b&\$$c)|(\$$b&\$$d)|(\$$c&\$$d) )+\$$a+\$D[$M[$i]]+0x5a827999)$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
-
- }
-
- for(; $i<48; $i++){
-
- my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
-
- $code.="\$t=(( \$$b^\$$c^\$$d )+\$$a+\$D[$M[$i]]+0x6ed9eba1)$M;\n";
-
- $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
-
- }
-
-
-
- $code.=<<EOT;
-
- \$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;
-
- \$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;
-
- } # for
-
- return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }
-
- EOT
-
- eval "$code";
-
-
-
- my $TEST=md5_pad('foobar');
-
- if( md4_perl_generated(\$TEST) ne
-
- '547aefd231dcbaac398625718336f143'){
-
- die('Error: MD4 self-test not successful.');
-
- }
-
- }
-
-
-
- } # md4 package container
-
-
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: multipart
-
-
-
- The multipart subpackage contains various utility functions which
-
- support making multipart requests (useful for uploading files).
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_set
-
-
-
- Params: \%multi_hash, $param_name, $param_value
-
- Return: nothing
-
-
-
- This function sets the named parameter to the given value within the
-
- supplied multipart hash.
-
-
-
- =cut
-
-
-
- sub multipart_set {
-
- my ($hr,$n,$v)=@_;
-
- return if(!ref($hr)); # error check
-
- return undef if(!defined $n || $n eq '');
-
- $$hr{$n}=$v;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_get
-
-
-
- Params: \%multi_hash, $param_name
-
- Return: $param_value, undef on error
-
-
-
- This function retrieves the named parameter to the given value within the
-
- supplied multipart hash. There is a special case where the named
-
- parameter is actually a file--in which case the resulting value will be
-
- "\0FILE". In general, all special values will be prefixed with a NULL
-
- character. In order to get a file's info, use multipart_getfile().
-
-
-
- =cut
-
-
-
- sub multipart_get {
-
- my ($hr,$n)=@_;
-
- return undef if(!ref($hr)); # error check
-
- return undef if(!defined $n || $n eq '');
-
- return $$hr{$n};
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_setfile
-
-
-
- Params: \%multi_hash, $param_name, $file_path [, $filename]
-
- Return: undef on error, 1 on success
-
-
-
- NOTE: this function does not actually add the contents of $file_path into
-
- the %multi_hash; instead, multipart_write() inserts the content when
-
- generating the final request.
-
-
-
- =cut
-
-
-
- sub multipart_setfile {
-
- my ($hr,$n,$path)=(shift,shift,shift);
-
- my ($fname)=shift;
-
-
-
- return undef if(!ref($hr)); # error check
-
- return undef if(!defined $n || $n eq '');
-
- return undef if(!defined $path);
-
- return undef if(! (-e $path && -f $path) );
-
-
-
- if(!defined $fname){
-
- $path=~m/[\\\/]([^\\\/]+)$/;
-
- $fname=$1||"whisker-file";
-
- }
-
-
-
- $$hr{$n}="\0FILE";
-
- $$hr{"\0$n"}=[$path,$fname];
-
- return 1;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_getfile
-
-
-
- Params: \%multi_hash, $file_param_name
-
- Return: $path, $name ($path=undef on error)
-
-
-
- LW::multipart_getfile is used to retrieve information for a file
-
- parameter contained in %multi_hash. To use this you would most
-
- likely do:
-
- ($path,$fname)=LW::multipart_getfile(\%multi, "param_name");
-
-
-
- =cut
-
-
-
- sub multipart_getfile {
-
- my ($hr,$n)=@_;
-
-
-
- return undef if(!ref($hr)); # error check
-
- return undef if(!defined $n || $n eq '');
-
- return undef if(!defined $$hr{$n} || $$hr{$n} ne "\0FILE");
-
-
-
- return @{$$hr{"\0$n"}};
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_boundary
-
-
-
- Params: \%multi_hash [, $new_boundary_name]
-
- Return: $current_boundary_name
-
-
-
- LW::multipart_boundary is used to retrieve, and optionally set, the
-
- multipart boundary used for the request.
-
-
-
- NOTE: the function does no checking on the supplied boundary, so if
-
- you want things to work make sure it's a legit boundary. Libwhisker
-
- does *not* prefix it with any '---' characters.
-
-
-
- =cut
-
-
-
- sub multipart_boundary {
-
- my ($hr,$new)=@_;
-
- my $ret;
-
-
-
- return undef if(!ref($hr)); # error check
-
-
-
- if(!defined $$hr{"\0BOUNDARY"}){
-
- # create boundary on the fly
-
- my $b = uc(LW::utils_randstr(20));
-
- my $b2 = '-' x 32;
-
- $$hr{"\0BOUNDARY"}="$b2$b";
-
- }
-
-
-
- $ret=$$hr{"\0BOUNDARY"};
-
- if(defined $new){
-
- $$hr{"\0BOUNDARY"}=$new;
-
- }
-
-
-
- return $ret;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_write
-
-
-
- Params: \%multi_hash, \%hin_request
-
- Return: 1 if successful, undef on error
-
-
-
- LW::multipart_write is used to parse and construct the multipart data
-
- contained in %multi_hash, and place it ready to go in the given whisker
-
- hash (%hin) structure, to be sent to the server.
-
-
-
- NOTE: file contents are read into the final %hin, so it's possible for
-
- the hash to get *very* large if you have (a) large file(s).
-
-
-
- =cut
-
-
-
- sub multipart_write {
-
- my ($mp,$hr)=@_;
-
-
-
- return undef if(!ref($mp)); # error check
-
- return undef if(!ref($hr)); # error check
-
-
-
- if(!defined $$mp{"\0BOUNDARY"}){
-
- # create boundary on the fly
-
- my $b = uc(LW::utils_randstr(20));
-
- my $b2 = '-' x 32;
-
- $$mp{"\0BOUNDARY"}="$b2$b";
-
- }
-
-
-
- my $B = $$mp{"\0BOUNDARY"};
-
- my $EOL = $$hr{whisker}->{http_eol}||"\x0d\x0a";
-
-
-
- my $keycount=0;
-
- foreach (keys %$mp){
-
- next if(substr($_,0,1) eq "\0");
-
- $keycount++;
-
- if($$mp{$_} eq "\0FILE"){
-
- my ($path,$name)=LW::multipart_getfile($mp,$_);
-
- next if(!defined $path);
-
- $$hr{whisker}->{data}.="$B$EOL";
-
- $$hr{whisker}->{data}.="Content-Disposition: ".
-
- "form-data; name=\"$_\"; ";
-
- $$hr{whisker}->{data}.="filename=\"$name\"$EOL";
-
- $$hr{whisker}->{data}.="Content-Type: ".
-
- "application/octet-stream$EOL";
-
- $$hr{whisker}->{data}.=$EOL;
-
- next if(!open(IN,"<$path"));
-
- binmode(IN); # stupid Windows
-
- while(<IN>){
-
- $$hr{whisker}->{data}.=$_; }
-
- close(IN);
-
- $$hr{whisker}->{data}.=$EOL; # WARNING: is this right?
-
- } else {
-
- $$hr{whisker}->{data}.="$B$EOL";
-
- $$hr{whisker}->{data}.="Content-Disposition: ".
-
- "form-data; name=\"$_\"$EOL";
-
- $$hr{whisker}->{data}.="$EOL$$mp{$_}$EOL";
-
- }
-
- }
-
-
-
- if($keycount){
-
- $$hr{whisker}->{data}.="$B--$EOL"; # closing boundary
-
- $$hr{"Content-Length"}=length($$hr{whisker}->{data});
-
- $$hr{"Content-Type"}="multipart/form-data; boundary=$B";
-
- return 1;
-
- } else {
-
- # multipart hash didn't contain params to upload
-
- return undef;
-
- }
-
- }
-
-
-
- ########################################################################
-
-
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_read
-
-
-
- Params: \%multi_hash, \%hout_response [, $filepath ]
-
- Return: 1 if successful, undef on error
-
-
-
- LW::multipart_read will parse the data contents of the supplied
-
- %hout_response hash, by passing the appropriate info to
-
- multipart_read_data(). Please see multipart_read_data() for more
-
- info on parameters and behaviour.
-
-
-
- NOTE: this function will return an error if the given %hout_response
-
- Content-Type is not set to "multipart/form-data".
-
-
-
- =cut
-
-
-
- sub multipart_read {
-
- my ($mp, $hr, $fp)=@_;
-
-
-
- return undef if(!(defined $mp && ref($mp)));
-
- return undef if(!(defined $hr && ref($hr)));
-
-
-
- my $ctype = LW::utils_find_lowercase_key($hr,'content-type');
-
- return undef if(!defined $ctype);
-
- return undef if($ctype!~m#^multipart/form-data#i);
-
-
-
- return LW::multipart_read_data($mp,
-
- \${$hr{'whisker'}->{'data'}},undef,$fp);
-
-
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_read_data
-
-
-
- Params: \%multi_hash, \$data, $boundary [, $filepath ]
-
- Return: 1 if successful, undef on error
-
-
-
- LW::multipart_read_data parses the contents of the supplied data using
-
- the given boundary and puts the values in the supplied %multi_hash.
-
- Embedded files will *not* be saved unless a $filepath is given, which
-
- should be a directory suitable for writing out temporary files.
-
-
-
- NOTE: currently only application/octet-stream is the only supported
-
- file encoding. All other file encodings will not be parsed/saved.
-
-
-
- =cut
-
-
-
- sub multipart_read_data {
-
- my ($mp, $dr, $bound, $fp)=@_;
-
-
-
- return undef if(!(defined $mp && ref($mp)));
-
- return undef if(!(defined $dr && ref($dr)));
-
-
-
- # if $bound is undef, then we'll snag what looks to be
-
- # the first boundry from the data.
-
- if(!defined $bound){
-
- if($$dr=~/([-]{5,}[A-Z0-9]+)[\r\n]/i){
-
- $bound=$1;
-
- } else {
-
- # we didn't spot a typical boundary; error
-
- return undef;
-
- }
-
- }
-
-
-
- if(defined $fp && !(-d $fp && -w $fp)){
-
- $fp=undef; }
-
-
-
- my $line = LW::utils_getline_crlf($dr,0);
-
- return undef if(!defined $line);
-
- return undef if( index($line,$bound) != 0);
-
-
-
- my $done=0;
-
- while(!$done){
-
- $done=multipart_read_data_part($mp, $dr, $bound, $fp);
-
- }
-
-
-
- return 1;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_read_data_part (INTERNAL)
-
-
-
- Params: \%multi_hash, \$data, $boundary, $filepath
-
- Return: 0 if more to read, 1 if done
-
-
-
- This is an internal function used by multipart_read_data, and should
-
- not be called on it's own. This is the workhorse, and is quite nasty.
-
-
-
- =cut
-
-
-
- sub multipart_read_data_part {
-
- my ($mp, $dr, $bound, $fp)=@_;
-
-
-
- my $dispinfo = LW::utils_getline_crlf($dr);
-
- return 1 if(!defined $dispinfo);
-
- return 1 if(length($dispinfo)==0);
-
- my $lcdisp = lc($dispinfo);
-
-
-
- if(index($lcdisp,'content-disposition: form-data;') != 0){
-
- return 1; } # bad disposition
-
-
-
- my ($s,$e,$l);
-
-
-
- $s=index($lcdisp,'name="',30);
-
- $e=index($lcdisp, '"', $s+6);
-
- return 1 if($s == -1 || $e == -1);
-
- my $NAME=substr($dispinfo,$s+6,$e-$s-6);
-
-
-
- $s=index($lcdisp,'filename="',$e);
-
- my $FILENAME=undef;
-
- if($s != -1){
-
- $e=index($lcdisp, '"', $s+10);
-
- return 1 if($e == -1); # puke; malformed filename
-
- $FILENAME=substr($dispinfo,$s+10,$e-$s-10);
-
- $s=rindex($FILENAME,'\\');
-
- $e=rindex($FILENAME,'/');
-
- $s=$e if($e>$s);
-
- $FILENAME=substr($FILENAME,$s+1,length($FILENAME)-$s);
-
- }
-
-
-
- my $CTYPE = LW::utils_getline_crlf($dr);
-
-
-
- return 1 if(!defined $CTYPE);
-
- $CTYPE = lc($CTYPE);
-
-
-
- if(length($CTYPE)>0){
-
- $s=index($CTYPE,'content-type:');
-
- return 1 if($s!=0); # bad ctype line
-
- $CTYPE=substr($CTYPE,13,length($CTYPE)-13);
-
- $CTYPE=~tr/ \t//d;
-
- my $xx=LW::utils_getline_crlf($dr);
-
- return 1 if(!defined $xx);
-
- return 1 if(length($xx)>0);
-
- } else {
-
- $CTYPE='application/octet-stream';
-
- }
-
-
-
-
-
- my $VALUE='';
-
- while( defined ($l=LW::utils_getline_crlf($dr)) ){
-
- last if(index($l,$bound)==0);
-
- $VALUE.=$l;
-
- $VALUE.="\r\n";
-
- }
-
-
-
- substr($VALUE,-2,2)='';
-
-
-
- if(!defined $FILENAME){ # read in param
-
- $$mp{$NAME}=$VALUE;
-
- return 0;
-
-
-
- } else { # read in file
-
- $$mp{$NAME}="\0FILE";
-
- return 0 if(!defined $fp);
-
-
-
- # TODO: funky content types, like application/x-macbinary
-
- if($CTYPE ne 'application/octet-stream'){
-
- return 0; }
-
-
-
- my $rfn = lc(LW::utils_randstr(12));
-
- my $fullpath = "$fp$rfn";
-
-
-
- $$mp{"\0$NAME"}=[undef,$FILENAME];
-
- return 0 if(!open(OUT,">$fullpath")); # error opening file
-
- binmode(OUT); # stupid Windows
-
- $$mp{"\0$NAME"}=[$fullpath,$FILENAME];
-
- print OUT $VALUE;
-
- close(OUT);
-
-
-
- return 0;
-
-
-
- } # if !defined $FILENAME
-
-
-
- return 0; # um, this should never be reached...
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_files_list
-
-
-
- Params: \%multi_hash
-
- Return: @files
-
-
-
- LW::multipart_files_list returns an array of parameter names for all
-
- the files that are contained in %multi_hash.
-
-
-
- =cut
-
-
-
- sub multipart_files_list {
-
- my ($mp)=shift;
-
- my @ret;
-
-
-
- return () if(!(defined $mp && ref($mp)));
-
- while( my ($K, $V)=each(%$mp)){
-
- push(@ret,$K) if($V eq "\0FILE"); }
-
- return @ret;
-
- }
-
-
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::multipart_params_list
-
-
-
- Params: \%multi_hash
-
- Return: @params
-
-
-
- LW::multipart_files_list returns an array of parameter names for all
-
- the regular parameters (non-file) that are contained in %multi_hash.
-
-
-
- =cut
-
-
-
- sub multipart_params_list {
-
- my ($mp)=shift;
-
- my @ret;
-
-
-
- return () if(!(defined $mp && ref($mp)));
-
- while( my ($K, $V)=each(%$mp)){
-
- push(@ret,$K) if($V ne "\0FILE" &&
-
- substr($K,0,1) ne "\0" );
-
- }
-
- return @ret;
-
- }
-
-
-
- ########################################################################
-
-
-
-
-
- =pod
-
-
-
-
-
- =head1 ++ Sub package: ntlm
-
-
-
- The ntlm sub package implements ntlm authentication routines.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ntlm_new
-
-
-
- Params: $username, $password [, $domain, $ntlm_only]
-
- Return: $ntlm_object
-
-
-
- Returns a reference to an array (otherwise known as the 'ntlm object')
-
- which contains the various informations specific to a user/pass combo.
-
- If $ntlm_only is set to 1, then only the NTLM hash (and not the LanMan
-
- hash) will be generated. This results in a speed boost, and is typically
-
- fine for using against IIS servers.
-
-
-
- The array contains the following items, in order:
-
- username, password, domain, lmhash(password), ntlmhash(password)
-
-
-
- =cut
-
-
-
- sub ntlm_new {
-
- my ($user,$pass,$domain,$flag)=@_;
-
- $flag||=0;
-
- return undef if(!defined $user);
-
- $pass||=''; $domain||='';
-
- my @a=("$user","$pass","$domain",undef,undef);
-
- my $t;
-
-
-
- if($flag==0){
-
- $t=substr($pass,0,14);
-
- $t=~tr/a-z/A-Z/;
-
- $t.= "\0"x(14-length($t));
-
- $a[3]=des_E_P16($t); # LanMan password hash
-
- $a[3].= "\0"x(21-length($a[3]));
-
- }
-
-
-
- $t=md4(encode_unicode($pass));
-
- $t=~s/([a-z0-9]{2})/sprintf("%c",hex($1))/ieg;
-
- $t.="\0"x(21-length($t));
-
- $a[4]=$t; # NTLM password hash
-
-
-
- &des_cache_reset(); # reset the keys hash
-
- return \@a;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ntlm_generate_responses (INTERNAL)
-
-
-
- Params: $ntlm_object, $challenge_token
-
- Return: $lanman_hash, $ntlm_hash
-
-
-
- Returns the challenge responses to the given tokens, using the password
-
- set in the $ntlm_object.
-
-
-
- =cut
-
-
-
- sub ntlm_generate_responses {
-
- my ($obj,$chal)=@_;
-
- return (undef,undef) if(!defined $obj || !defined $chal);
-
- return (undef,undef) if(!ref($obj));
-
- my $x='';
-
- $x=des_E_P24($obj->[3], $chal) if(defined $obj->[3]);
-
- return ($x, des_E_P24($obj->[4], $chal));
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ntlm_decode_challenge (INTERNAL)
-
-
-
- Params: $challenge
-
- Return: @challenge_parts
-
-
-
- Splits the supplied challenge into the various parts. The returned array
-
- contains elements in the following order:
-
-
-
- unicode_domain, ident, packet_type, domain_len, domain_maxlen,
-
- domain_offset, flags, challenge_token, reserved, empty, raw_data
-
-
-
- =cut
-
-
-
- sub ntlm_decode_challenge {
-
- return undef if(!defined $_[0]);
-
- my $chal=shift;
-
- my @res;
-
-
-
- @res=unpack('Z8VvvVVa8a8a8',substr($chal,0,48));
-
- push(@res,substr($chal,48));
-
- unshift(@res,substr($chal,$res[4],$res[2]));
-
- return @res;
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ntlm_header (INTERNAL)
-
-
-
- Params: $string, $header_length, $offset
-
- Return: $header
-
-
-
- Constructs an appropriate header for the supplied $string.
-
-
-
- =cut
-
-
-
- sub ntlm_header {
-
- my ($s,$h,$o)=@_;
-
- my $l=length($s);
-
- return pack('vvV',0,0,$o-$h) if($l==0);
-
- return pack('vvV',$l,$l,$o);
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::ntlm_client
-
-
-
- Params: $ntlm_obj [, $server_challenge]
-
- Return: $response
-
-
-
- ntlm_client() is responsible for generating the base64-encoded text you
-
- include in the HTTP Authorization header. If you call ntlm_client()
-
- without a $server_challenge, the function will return the initial NTLM
-
- request packet (message packet #1). You send this to the server, and
-
- take the server's response (message packet #2) and pass that as
-
- $server_challenge, causing ntlm_client() to generate the final response
-
- packet (message packet #3).
-
-
-
- Note: $server_challenge is expected to be base64 encoded.
-
-
-
- =cut
-
-
-
- sub ntlm_client {
-
- my ($obj,$p)=@_;
-
- my $resp="NTLMSSP\0";
-
-
-
- return undef if(!defined $obj || !ref($obj));
-
-
-
- if(defined $p && $p ne ''){ # answer challenge
-
- $p=~tr/ \t\r\n//d;
-
- $p=LW::decode_base64($p);
-
- my @c=ntlm_decode_challenge($p);
-
- $uu=encode_unicode($obj->[0]); # username
-
- $resp.=pack('V',3);
-
- my($hl,$hn)=ntlm_generate_responses($obj,$c[7]); # token
-
- return undef if(!defined $hl || !defined $hn);
-
- my $o=64;
-
- $resp.=ntlm_header($hl,64,$o); # LM hash
-
- $resp.=ntlm_header($hn,64,($o+=length($hl))); # NTLM hash
-
- $resp.=ntlm_header($c[0],64,($o+=length($hn))); # domain
-
- $resp.=ntlm_header($uu,64,($o+=length($c[0]))); # username
-
- $resp.=ntlm_header($uu,64,($o+=length($uu))); # workstation
-
- $resp.=ntlm_header('',64,($o+=length($uu))); # session
-
- $resp.=pack('V',$c[6]);
-
- $resp.=$hl.$hn.$c[0].$uu.$uu;
-
-
-
- } else { # initiate challenge
-
- $resp.=pack('VV',1,0x0000b207);
-
- $resp.=ntlm_header($obj->[0],32,32);
-
- $resp.=ntlm_header($obj->[2],32,32+length($obj->[0]));
-
- $resp .= $obj->[0].$obj->[2];
-
- }
-
-
-
- return encode_base64($resp,'');
-
- }
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
-
-
- =head1 ++ Sub package: ntlm_des
-
-
-
- The ntlm_des sub package implements unchained forward DES in perl, which
-
- is needed by the ntlm auth package to do it's thing. Note that
-
- unchained forward DES is not a symmetrical cipher--it's much more like
-
- using DES as a digest/hash algorithm. Thus there is very little
-
- practical reuse of this code outside of NTLM authentication.
-
-
-
- The code below has also been 'tweaked' for the reuse of the set of keys,
-
- which is typical when requiring multiple authentication runs. This leads
-
- to a speed increase when multiple authentications are needed.
-
-
-
- The code below is a highly-modified version of Authen::NTLM::DES.pm,
-
- written by Mark.Bush@bushnet.demon.co.uk. Portions of the code below
-
- bear the following copyrights:
-
-
-
- Copyright (C) 2001 Mark Bush. <Mark.Bush@bushnet.demon.co.uk>
-
-
-
- The code is based on fetchmail code which is Copyright (C) 1997 Eric
-
- S. Raymond.
-
-
-
- Fetchmail uses SMB/Netbios code from samba which is Copyright (C)
-
- Andrew Tridgell 1992-1998 with modifications from Jeremy Allison.
-
-
-
- All the des_* functions should be considered internal and not called
-
- directly.
-
-
-
- =cut
-
-
-
- { # start of DES local container #######################################
-
- my $generated=0;
-
- my $perm1 = [57, 49, 41, 33, 25, 17, 9, 1, 58, 50, 42, 34, 26, 18,
-
- 10, 2, 59, 51, 43, 35, 27, 19, 11, 3, 60, 52, 44, 36,
-
- 63, 55, 47, 39, 31, 23, 15, 7, 62, 54, 46, 38, 30, 22,
-
- 14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 28, 20, 12, 4];
-
- my $perm2 = [14, 17, 11, 24, 1, 5, 3, 28, 15, 6, 21, 10,
-
- 23, 19, 12, 4, 26, 8, 16, 7, 27, 20, 13, 2,
-
- 41, 52, 31, 37, 47, 55, 30, 40, 51, 45, 33, 48,
-
- 44, 49, 39, 56, 34, 53, 46, 42, 50, 36, 29, 32];
-
- my $perm3 = [58, 50, 42, 34, 26, 18, 10, 2, 60, 52, 44, 36, 28, 20, 12, 4,
-
- 62, 54, 46, 38, 30, 22, 14, 6, 64, 56, 48, 40, 32, 24, 16, 8,
-
- 57, 49, 41, 33, 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
-
- 61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7];
-
- my $perm4 = [32, 1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9,
-
- 8, 9, 10, 11, 12, 13, 12, 13, 14, 15, 16, 17,
-
- 16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25,
-
- 24, 25, 26, 27, 28, 29, 28, 29, 30, 31, 32, 1];
-
- my $perm5 = [16, 7, 20, 21, 29, 12, 28, 17, 1, 15, 23, 26, 5, 18, 31, 10,
-
- 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25];
-
- my $perm6 = [40, 8, 48, 16, 56, 24, 64, 32, 39, 7, 47, 15, 55, 23, 63, 31,
-
- 38, 6, 46, 14, 54, 22, 62, 30, 37, 5, 45, 13, 53, 21, 61, 29,
-
- 36, 4, 44, 12, 52, 20, 60, 28, 35, 3, 43, 11, 51, 19, 59, 27,
-
- 34, 2, 42, 10, 50, 18, 58, 26, 33, 1, 41, 9, 49, 17, 57, 25];
-
- my $sc = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1];
-
-
-
- sub des_E_P16 {
-
- my ($p14) = @_;
-
- my $sp8 = [0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25];
-
- my $p7 = substr($p14, 0, 7);
-
- my $p16 = des_smbhash($sp8, $p7);
-
- $p7 = substr($p14, 7, 7);
-
- $p16 .= des_smbhash($sp8, $p7);
-
- return $p16;
-
- }
-
-
-
- sub des_E_P24 {
-
- my ($p21, $c8_str) = @_;
-
- my @c8 = map {ord($_)} split(//, $c8_str);
-
- my $p24 = des_smbhash(\@c8, substr($p21, 0, 7));
-
- $p24 .= des_smbhash(\@c8, substr($p21, 7, 7));
-
- $p24 .= des_smbhash(\@c8, substr($p21, 14, 7));
-
- }
-
-
-
- sub des_permute {
-
- my ($i,$out, $in, $p, $n) = (0,@_);
-
- foreach $i (0..($n-1)){
-
- $out->[$i] = $in->[$p->[$i]-1]; }
-
- }
-
-
-
- sub des_lshift {
-
- my ($c, $d, $count)=@_;
-
- my (@outc, @outd, $i, $x);
-
- while($count--){
-
- push @$c, shift @$c;
-
- push @$d, shift @$d;
-
- }
-
- }
-
-
-
- my %dohash_cache; # cache for key data; saves some cycles
-
- my %key_cache; # another cache for key data
-
- sub des_cache_reset {
-
- %dohash_cache=();
-
- %key_cache=();
-
- }
-
-
-
- sub des_dohash
-
- {
-
- my ($out, $in, $key) = @_;
-
- my ($i, $j, $k, @pk1, @c, @d, @cd,
-
- @ki, @pd1, @l, @r, @rl);
-
-
-
- # if(!defined $dohash_cache{$skey}){
-
- &des_permute(\@pk1, $key, $perm1, 56);
-
-
-
- for($i=0;$i<28;$i++) {
-
- $c[$i] = $pk1[$i];
-
- $d[$i] = $pk1[$i+28];
-
- }
-
- for($i=0;$i<16;$i++){
-
- my @array;
-
- &des_lshift(\@c,\@d,$sc->[$i]);
-
- @cd = (@c, @d);
-
- &des_permute(\@array, \@cd, $perm2, 48);
-
- $ki[$i] = \@array;
-
- # $dohash_cache{$skey}->[$i]=\@array;
-
- }
-
- # } else {
-
- # for($i=0;$i<16;$i++){
-
- # $ki[$i]=$dohash_cache{$skey}->[$i];}
-
- # }
-
-
-
- des_dohash2($in,\@l,\@r,\@ki);
-
-
-
- @rl = (@r, @l);
-
- &des_permute($out, \@rl, $perm6, 64);
-
- }
-
-
-
- sub des_str_to_key{
-
- my ($str) = @_;
-
- my ($i,@key,$out);
-
- unshift(@str,ord($_))while($_=chop($str));
-
- $key[0] = $str[0]>>1;
-
- $key[1] = (($str[0]&0x01)<<6) | ($str[1]>>2);
-
- $key[2] = (($str[1]&0x03)<<5) | ($str[2]>>3);
-
- $key[3] = (($str[2]&0x07)<<4) | ($str[3]>>4);
-
- $key[4] = (($str[3]&0x0f)<<3) | ($str[4]>>5);
-
- $key[5] = (($str[4]&0x1f)<<2) | ($str[5]>>6);
-
- $key[6] = (($str[5]&0x3f)<<1) | ($str[6]>>7);
-
- $key[7] = $str[6]&0x7f;
-
- foreach $i (0..7) {
-
- $key[$i] = 0xff&($key[$i]<<1); }
-
- @{$key_cache{$str}}=@key;
-
- return \@key;
-
- }
-
-
-
- sub des_smbhash
-
- {
-
- # use faster binary helper
-
- goto &LW::bin::des_smbhash if(defined $LW::available{'lw::bin'});
-
-
-
- my ($in, $key) = @_;
-
- my $key2;
-
-
-
- &des_generate if(!$generated);
-
- if(defined $key_cache{$key}){ $key2=$key_cache{$key};
-
- } else { $key2=&des_str_to_key($key); }
-
-
-
- my ($i, $div, $mod, @in, @outb, @inb, @keyb, @out);
-
- foreach $i (0..63){
-
- $div = int($i/8); $mod = $i%8;
-
- $inb[$i] = ($in->[$div] & (1<<(7-($mod))))? 1: 0;
-
- $keyb[$i] = ($key2->[$div] & (1<<(7-($mod))))? 1: 0;
-
- $outb[$i] = 0;
-
- }
-
- &des_dohash(\@outb, \@inb, \@keyb);
-
- foreach $i (0..7){ $out[$i] = 0; }
-
- foreach $i (0..63){
-
- $out[int($i/8)] |= (1<<(7-($i%8))) if ($outb[$i]); }
-
- my $out = pack("C8", @out);
-
-
-
- return $out;
-
- }
-
-
-
-
-
- sub des_generate { # really scary dragons here....this code is optimized
-
- # for speed, and not readability
-
- my ($i,$j);
-
- my $code=<<EOT;
-
- { my \$sbox = [[
-
- [14,4,13,1,2,15,11,8,3,10,6,12,5,9,0,7],[0,15,7,4,14,2,13,1,10,6,12,11,9,5,3,8],
-
- [4,1,14,8,13,6,2,11,15,12,9,7,3,10,5,0],[15,12,8,2,4,9,1,7,5,11,3,14,10,0,6,13]
-
- ],[
-
- [15,1,8,14,6,11,3,4,9,7,2,13,12,0,5,10],[3,13,4,7,15,2,8,14,12,0,1,10,6,9,11,5],
-
- [0,14,7,11,10,4,13,1,5,8,12,6,9,3,2,15],[13,8,10,1,3,15,4,2,11,6,7,12,0,5,14,9]
-
- ],[
-
- [10,0,9,14,6,3,15,5,1,13,12,7,11,4,2,8],[13,7,0,9,3,4,6,10,2,8,5,14,12,11,15,1],
-
- [13,6,4,9,8,15,3,0,11,1,2,12,5,10,14,7],[1,10,13,0,6,9,8,7,4,15,14,3,11,5,2,12]
-
- ],[
-
- [7,13,14,3,0,6,9,10,1,2,8,5,11,12,4,15],[13,8,11,5,6,15,0,3,4,7,2,12,1,10,14,9],
-
- [10,6,9,0,12,11,7,13,15,1,3,14,5,2,8,4],[3,15,0,6,10,1,13,8,9,4,5,11,12,7,2,14]
-
- ],[
-
- [2,12,4,1,7,10,11,6,8,5,3,15,13,0,14,9],[14,11,2,12,4,7,13,1,5,0,15,10,3,9,8,6],
-
- [4,2,1,11,10,13,7,8,15,9,12,5,6,3,0,14],[11,8,12,7,1,14,2,13,6,15,0,9,10,4,5,3]
-
- ],[
-
- [12,1,10,15,9,2,6,8,0,13,3,4,14,7,5,11],[10,15,4,2,7,12,9,5,6,1,13,14,0,11,3,8],
-
- [9,14,15,5,2,8,12,3,7,0,4,10,1,13,11,6],[4,3,2,12,9,5,15,10,11,14,1,7,6,0,8,13]
-
- ],[
-
- [4,11,2,14,15,0,8,13,3,12,9,7,5,10,6,1],[13,0,11,7,4,9,1,10,14,3,5,12,2,15,8,6],
-
- [1,4,11,13,12,3,7,14,10,15,6,8,0,5,9,2],[6,11,13,8,1,4,10,7,9,5,0,15,14,2,3,12]
-
- ],[
-
- [13,2,8,4,6,15,11,1,10,9,3,14,5,0,12,7],[1,15,13,8,10,3,7,4,12,5,6,11,0,14,9,2],
-
- [7,11,4,1,9,12,14,2,0,6,10,13,15,3,5,8],[2,1,14,7,4,10,8,13,15,12,9,0,3,5,6,11]
-
- ]];
-
- EOT
-
-
-
- $code.='sub des_dohash2 { my ($in,$l,$r,$ki)=@_; my (@p,$i,$j,$k,$m,$n);';
-
- for($i=0;$i<64;$i++){
-
- $code.="\$p[$i] = \$in->[".($perm3->[$i]-1)."];\n"; }
-
- for($i=0;$i<32;$i++){
-
- $code.="\$l->[$i]=\$p[$i]; \$r->[$i]=\$p[".($i+32)."];\n"; }
-
- $code.='for($i=0;$i<16;$i++){ local (@er,@erk,@b,@cb,@pcb,@r2);';
-
- for($i=0;$i<48;$i++){
-
- $code.="\$erk[$i]=\$r->[".($perm4->[$i]-1)."]^(\$ki->[\$i]->[$i]);\n"; }
-
- for($i=0;$i<8;$i++){
-
- for($j=0;$j<6;$j++){
-
- $code.="\$b[$i][$j]=\$erk[".($i*6+$j)."];\n"; }}
-
- for($i=0;$i<8;$i++){
-
- $code.="\$m=(\$b[$i][0]<<1)|\$b[$i][5];";
-
- $code.="\$n=(\$b[$i][1]<<3)|(\$b[$i][2]<<2)|(\$b[$i][3]<<1)|\$b[$i][4];";
-
- for($j=0;$j<4;$j++){
-
- $code.="\$b[$i][$j]=(\$sbox->[$i][\$m][\$n]&".(1<<(3-$j)).")?1:0;"; }}
-
- for($i=0;$i<8;$i++){
-
- for($j=0;$j<4;$j++){
-
- $code.="\$cb[".($i*4+$j)."]=\$b[$i][$j];\n"; }}
-
- for($i=0;$i<32;$i++){
-
- $code.="\$pcb[$i]=\$cb[".($perm5->[$i]-1)."];\n"; }
-
- for($i=0;$i<32;$i++){
-
- $code.="\$r2[$i]=(\$l->[$i])^\$pcb[$i];\n"; }
-
- for($i=0;$i<32;$i++){
-
- $code.="\$l->[$i]=\$r->[$i]; \$r->[$i]=\$r2[$i];\n"; }
-
- $code.='}}}';
-
-
-
- eval "$code";
-
- $generated++;
-
- }
-
-
-
- } ##### end of DES container ################################################
-
-
-
-
-
- =pod
-
-
-
- =head1 ++ Sub package: utils
-
-
-
- The utils subpackage contains various utility functions which serve
-
- different purposes.
-
-
-
- =cut
-
-
-
- ########################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_recperm
-
-
-
- Params: $uri, $depth, \@dir_parts, \@valid, \&func, \%track, \%arrays, \&cfunc
-
- Return: nothing
-
-
-
- This is a special function which is used to recursively-permutate through
-
- a given directory listing. This is really only used by whisker, in order
-
- to traverse down directories, testing them as it goes. See whisker 2.0 for
-
- exact usage examples.
-
-
-
- =cut
-
-
-
- # '/', 0, \@dir.split, \@valid, \&func, \%track, \%arrays, \&cfunc
-
- sub utils_recperm {
-
- my ($p, $pp, $pn, $r, $fr, $dr, $ar, $cr)=(shift,shift,@_);
-
- $p=~s#/+#/#g; if($pp >= @$pn) { push @$r, $p if &$cr($$dr{$p});
-
- } else { my $c=$$pn[$pp];
-
- if($c!~/^\@/){ utils_recperm($p.$c.'/',$pp+1,@_) if(&$fr($p.$c.'/'));
-
- } else { $c=~tr/\@//d; if(defined $$ar{$c}){
-
- foreach $d (@{$$ar{$c}}){
-
- if(&$fr($p.$d.'/')){
-
- utils_recperm($p.$d.'/',$pp+1,@_);}}}}}}
-
-
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_array_shuffle
-
-
-
- Params: @array
-
- Return: nothing
-
-
-
- This function will randomize the order of the elements in the given array.
-
-
-
- =cut
-
-
-
- sub utils_array_shuffle { # fisher yates shuffle....w00p!
-
- my $array=shift; my $i;
-
- for ($i = @$array; --$i;){
-
- my $j = int rand ($i+1);
-
- next if $i==$j;
-
- @$array[$i,$j]=@$array[$j,$i];
-
- }} # end array_shuffle, from Perl Cookbook (rock!)
-
-
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_randstr
-
-
-
- Params: [ $size, $chars ]
-
- Return: $random_string
-
-
-
- This function generates a random string between 10 and 20 characters
-
- long, or of $size if specified. If $chars is specified, then the
-
- random function picks characters from the supplied string. For example,
-
- to have a random string of 10 characters, composed of only the characters
-
- 'abcdef', then you would run:
-
-
-
- LW::utils_randstr(10,'abcdef');
-
-
-
- The default character string is alphanumeric.
-
-
-
- =cut
-
-
-
- sub utils_randstr {
-
- my $str;
-
- my $drift=shift||((rand() * 10) % 10)+10;
-
-
-
- # 'a'..'z' doesn't seem to work on string assignment :(
-
- my $CHARS = shift || 'abcdefghijklmnopqrstuvwxyz' .
-
- 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' .
-
- '0123456789';
-
-
-
- my $L = length($CHARS);
-
- for(1..$drift){
-
- $str .= substr($CHARS,((rand() * $L) % $L),1);
-
- }
-
- return $str;}
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_get_dir
-
-
-
- Params: $uri
-
- Return: $uri_directory
-
-
-
- Will take a URI and return the directory base of it, i.e. /rfp/page.php
-
- will return /rfp/.
-
-
-
- =cut
-
-
-
- sub utils_get_dir {
-
- my ($w,$URL)=(0,shift);
-
-
-
- return undef if(!defined $URL);
-
-
-
- $URL=substr($URL,0,$w) if( ($w=index($URL,'#')) >= 0);
-
- $URL=substr($URL,0,$w) if( ($w=index($URL,'?')) >= 0);
-
-
-
- if( ($w=rindex($URL,'/')) >= 0){
-
- $URL = substr($URL,0,$w+1);
-
- }
-
- return $URL;
-
- }
-
-
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_port_open
-
-
-
- Params: $host, $port
-
- Return: $result
-
-
-
- Quick function to attempt to make a connection to the given host and
-
- port. If a connection was successfully made, function will return true
-
- (1). Otherwise it returns false (0).
-
-
-
- Note: this uses standard TCP connections, thus is not recommended for use
-
- in port-scanning type applications. Extremely slow.
-
-
-
- =cut
-
-
-
- sub utils_port_open { # this should be platform-safe
-
- my ($target,$port)=@_;
-
-
-
- return 0 if(!defined $target || !defined $port);
-
-
-
- if(!(socket(S,PF_INET,SOCK_STREAM,0))){ return 0;}
-
- if(connect(S,sockaddr_in($port,inet_aton($target)))){
-
- close(S); return 1;
-
- } else { return 0;}}
-
-
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_split_uri
-
-
-
- Params: $uri_string [, \%hin_request]
-
- Return: @uri_parts
-
-
-
- Return an array of the following values, in order: uri, protocol, host,
-
- port, params, frag, user, password. Values not defined are given an undef
-
- value. If a %hin_request hash is passed in, then utils_split_uri() will
-
- also set the appropriate values in the hash. While it attempts to do
-
- RFC-compliant URI parsing, it still caters to HTTP[S] only.
-
-
-
- Note: utils_split_uri() will only set the %hin_request if the protocol
-
- is HTTP or HTTPS!
-
-
-
- =cut
-
-
-
- sub utils_split_uri {
-
- my ($uri,$w)=(shift,'',0);
-
- my ($hr)=shift;
-
- my @res=(undef,'http',undef,0,undef,undef,undef,undef);
-
-
-
- return undef if(!defined $uri);
-
-
-
- # remove fragments
-
- ($uri,$res[5])=split('#',$uri,2) if(index($uri,'#',0) >=0);
-
-
-
- # get scheme and net_loc
-
- my $net_loc = undef;
-
- if($uri=~s/^([-+.a-z0-9A-Z]+)://){
-
- $res[1]=lc($1);
-
- if(substr($uri,0,2) eq '//'){
-
- $w=index($uri,'/',2);
-
- if($w >= 0){
-
- $net_loc=substr($uri,2,$w-2);
-
- $uri=substr($uri,$w,length($uri)-$w);
-
- } else {
-
- ($net_loc=$uri)=~tr#/##d;
-
- $uri='/';
-
- }
-
- }
-
- }
-
-
-
-
-
- # parse net_loc info
-
- if(defined $net_loc){
-
- if(index($net_loc,'@',0) >=0){
-
- ($res[6],$net_loc)=split('@',$net_loc,2);
-
- if(index($res[6],':',0) >=0){
-
- ($res[6],$res[7])=split(':',$res[6],2);
-
- }
-
- }
-
- $res[3]=$1 if($net_loc=~s/:([0-9]+)$//);
-
- $res[2]=$net_loc;
-
- }
-
-
-
- # remove query info
-
- ($uri,$res[4])=split('\?',$uri,2) if(index($uri,'?',0) >=0);
-
-
-
- # whatever is left over is the uri
-
- $res[0]=$uri;
-
-
-
- if($res[3]==0 && defined $res[1]){
-
- $res[3]=80 if($res[1] eq 'http');
-
- $res[3]=443 if($res[1] eq 'https');
-
- }
-
-
-
- return @res if($res[3]==0);
-
-
-
- # setup whisker hash
-
- if(defined $hr && ref($hr)){
-
- # these must always exist
-
- $$hr{whisker}->{uri}=$res[0] if(defined $res[0]);
-
- $$hr{whisker}->{ssl}=1 if($res[1] eq 'https');
-
- $$hr{whisker}->{host}=$res[2] if(defined $res[2]);
-
- $$hr{whisker}->{port}=$res[3] ;
-
-
-
- # set/delete parameter attributes
-
- if(defined $res[4]){
-
- $$hr{whisker}->{uri_param}=$res[4];
-
- } else { delete $$hr{whisker}->{uri_param}; }
-
- if(defined $res[6]){
-
- $$hr{whisker}->{uri_user}=$res[6];
-
- } else { delete $$hr{whisker}->{uri_user}; }
-
- if(defined $res[7]){
-
- $$hr{whisker}->{uri_password}=$res[7];
-
- } else { delete $$hr{whisker}->{uri_password}; }
-
- }
-
-
-
- return @res;
-
- }
-
-
-
- #################################################################
-
- =pod
-
-
-
- =head1 - Function: LW::utils_lowercase_headers
-
-
-
- Params: \%hash
-
- Return: nothing
-
-
-
- Will lowercase all the header names (but not values) of the given hash.
-
-
-
- =cut
-
-
-
- sub utils_lowercase_headers {
-
- goto &utils_lowercase_hashkeys;
-
- }
-
-
-
- #################################################################
-
- =pod
-
-
-
- =head1 - Function: LW::utils_lowercase_hashkeys
-
-
-
- Params: \%hash
-
- Return: nothing
-
-
-
- Will lowercase all the header names (but not values) of the given hash.
-
-
-
- =cut
-
-
-
- sub utils_lowercase_hashkeys {
-
- my $href=shift;
-
-
-
- return if(!(defined $href && ref($href)));
-
-
-
- while( my ($key,$val)=each %$href ){
-
- delete $$href{$key};
-
- $$href{lc($key)}=$val;
-
- }
-
- }
-
-
-
- #################################################################
-
- =pod
-
-
-
- =head1 - Function: LW::utils_find_lowercase_key
-
-
-
- Params: \%hash, $key
-
- Return: $value, undef on error or not exist
-
-
-
- Searches the given hash for the $key (regardless of case), and
-
- returns the value.
-
-
-
- =cut
-
-
-
- sub utils_find_lowercase_key {
-
- my ($href,$key)=(shift,lc(shift));
-
-
-
- return undef if(!(defined $href && ref($href)));
-
- return undef if(!defined $key);
-
-
-
- while( my ($k,$v)=each %$href ){
-
- return $v if(lc($k) eq $key);
-
- }
-
- return undef;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_join_uri
-
-
-
- Params: @vals
-
- Return: $url
-
-
-
- Takes the @vals array output from utils_split_uri, and returns a single
-
- scalar/string with them joined again, in the form of:
-
- protocol://user:password@host:port/uri?params#frag
-
-
-
- =cut
-
-
-
- sub utils_join_uri {
-
- my @V=@_;
-
- my $URL;
-
-
-
- $URL.=$V[1].':' if defined $V[1];
-
- if(defined $V[2]){
-
- $URL.='//';
-
- if(defined $V[6]){
-
- $URL.=$V[6];
-
- $URL.=':'.$V[7] if defined $V[7];
-
- $URL.='@';
-
- }
-
- $URL.=$V[2];
-
- }
-
- if($V[3]>0){
-
- my $no = 0;
-
- $no++ if($V[3]==80 && defined $V[1] && $V[1] eq 'http');
-
- $no++ if($V[3]==443 && defined $V[1] && $V[1] eq 'https');
-
- $URL .= ':'.$V[3] if(!$no);
-
- }
-
- $URL.=$V[0];
-
- $URL .= '?'.$V[4] if defined $V[4];
-
- $URL .= '#'.$V[5] if defined $V[5];
-
- return $URL;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_getline
-
-
-
- Params: \$data [, $resetpos ]
-
- Return: $line (undef if no more data)
-
-
-
- Fetches the next \n terminated line from the given data. Use
-
- the optional $resetpos to reset the internal position pointer.
-
- Does *NOT* return trialing \n.
-
-
-
- =cut
-
-
-
- { $POS=0;
-
- sub utils_getline {
-
- my ($dr, $rp)=@_;
-
-
-
- return undef if(!(defined $dr && ref($dr)));
-
- $POS=$rp if(defined $rp);
-
-
-
- my $where=index($$dr,"\n",$POS);
-
- return undef if($where==-1);
-
-
-
- my $str=substr($$dr,$POS,$where-$POS);
-
- $POS=$where+1;
-
-
-
- return $str;
-
- }}
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_getline_crlf
-
-
-
- Params: \$data [, $resetpos ]
-
- Return: $line (undef if no more data)
-
-
-
- Fetches the next \r\n terminated line from the given data. Use
-
- the optional $resetpos to reset the internal position pointer.
-
- Does *NOT* return trialing \r\n.
-
-
-
- =cut
-
-
-
- { $POS=0;
-
- sub utils_getline_crlf {
-
- my ($dr, $rp)=@_;
-
-
-
- return undef if(!(defined $dr && ref($dr)));
-
- $POS=$rp if(defined $rp);
-
-
-
- my $tpos=$POS;
-
- while(1){
-
- my $where=index($$dr,"\n",$tpos);
-
- return undef if($where==-1);
-
-
-
- if(substr($$dr,$where-1,1) eq "\r"){
-
- my $str=substr($$dr,$POS,$where-$POS-1);
-
- $POS=$where+1;
-
- return $str;
-
- } else {
-
- $tpos=$where+1;
-
- }
-
- }
-
- }}
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_absolute_uri
-
-
-
- Params: $uri, $base_uri [, $normalize_flag ]
-
- Return: $absolute_$url
-
-
-
- Double checks that the given $uri is in absolute form (that is,
-
- "http://host/file"), and if not (it's in the form "/file"), then
-
- it will append the given $base_uri to make it absolute. This
-
- provides a compatibility similar to that found in the URI
-
- subpackage.
-
-
-
- If $normalize_flag is set to 1, then the output will be passed
-
- through utils_normalize_uri before being returned.
-
-
-
- =cut
-
-
-
- sub utils_absolute_uri {
-
- my ($uri, $buri, $norm)=@_;
-
- return undef if(!defined $uri || !defined $buri);
-
- return $uri if($uri=~m#^[a-zA-Z]+://#);
-
-
-
- if(substr($uri,0,1) eq '/'){
-
- if($buri=~m#^[a-zA-Z]+://#){
-
- my @p=utils_split_uri($buri);
-
- $buri="$p[1]://$p[2]";
-
- $buri.=":$p[3]" if($p[3]!=80);
-
- $buri.='/';
-
- } else { # ah suck, base URI isn't absolute...
-
- return $uri;
-
- }
-
- } else {
-
- $buri=~s/[?#].*$//; # remove params and frags
-
- $buri.='/' if($buri=~m#^[a-z]+://[^/]+$#i);
-
- $buri=~s#/[^/]*$#/#;
-
- }
-
- return utils_normalize_uri("$buri$uri")
-
- if(defined $norm && $norm > 0);
-
- return $buri.$uri;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_normalize_uri
-
-
-
- Params: $uri [, $fix_windows_slashes ]
-
- Return: $normalized_uri
-
-
-
- Takes the given $uri and does any /./ and /../ dereferencing in
-
- order to come up with the correct absolute URL. If the $fix_
-
- windows_slashes parameter is set to 1, all \ (back slashes) will
-
- be converted to / (forward slashes).
-
-
-
- =cut
-
-
-
- sub utils_normalize_uri {
-
- my ($host,$uri, $win)=('',@_);
-
-
-
- $uri=~tr#\\#/# if(defined $win && $win>0);
-
-
-
- if($uri=~s#^([-+.a-z0-9A-Z]+:)##){
-
- return undef if($1 ne 'http:' && $1 ne 'https:');
-
- $host=$1;
-
- return undef unless($uri=~s#^(//[^/]+)##);
-
- $host.=$1; }
-
- return "$host/" if($uri eq '' || $uri eq '/');
-
-
-
- # fast path check
-
- return "$host$uri" if(index($uri,'/.')<0);
-
-
-
- # parse order/steps as defined in RFC 1808
-
- 1 while($uri=~s#/\./#/# || $uri=~s#//#/#);
-
- $uri=~s#/\.$#/#;
-
- 1 while($uri=~s#[^/]+/\.\./##);
-
- 1 while($uri=~s#^/\.\./#/#);
-
- $uri=~s#[^/]*/\.\.$##;
-
- $uri||='/';
-
- return $host.$uri;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_save_page
-
-
-
- Params: $file, \%response
-
- Return: 0 on success, 1 on error
-
-
-
- Saves the data portion of the given whisker %response hash to the
-
- indicated file. Can technically save the data portion of a
-
- %request hash too. A file is not written if there is no data.
-
-
-
- Note: LW does not do any special file checking; files are opened
-
- in overwrite mode.
-
-
-
- =cut
-
-
-
- sub utils_save_page {
-
- my ($file, $hr)=@_;
-
- return 1 if(!ref($hr) || ref($file));
-
- return 0 if(!defined $$hr{'whisker'} ||
-
- !defined $$hr{'whisker'}->{'data'});
-
- open(OUT,">$file") || return 1;
-
- print OUT $$hr{'whisker'}->{'data'};
-
- close(OUT);
-
- return 0;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_getopts
-
-
-
- Params: $opt_str, \%opt_results
-
- Return: 0 on success, 1 on error
-
-
-
- This function is a general implementation of GetOpts::Std. It will
-
- parse @ARGV, looking for the options specified in $opt_str, and will
-
- put the results in %opt_results. Behavior/parameter values are
-
- similar to GetOpts::Std's getopts().
-
-
-
- Note: this function does *not* support long options (--option),
-
- option grouping (-opq), or options with immediate values (-ovalue).
-
- If an option is indicated as having a value, it will take the next
-
- argument regardless.
-
-
-
- =cut
-
-
-
- sub utils_getopts {
-
- my ($str,$ref)=@_;
-
- my (%O,$l);
-
- my @left;
-
-
-
- return 1 if($str=~tr/-:a-zA-Z0-9//c);
-
-
-
- while($str=~m/([a-z0-9]:{0,1})/ig){
-
- $l=$1;
-
- if($l=~tr/://d){ $O{$l}=1;
-
- } else { $O{$l}=0; }
-
- }
-
-
-
- while($l=shift(@ARGV)){
-
- push(@left,$l)&&next if(substr($l,0,1) ne '-');
-
- push(@left,$l)&&next if($l eq '-');
-
- substr($l,0,1)='';
-
- if(length($l)!=1){
-
- %$ref=();
-
- return 1; }
-
- if($O{$l}==1){
-
- my $x=shift(@ARGV);
-
- $$ref{$l}=$x;
-
- } else { $$ref{$l}=1; }
-
- }
-
-
-
- @ARGV=@left;
-
- return 0;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_unidecode_uri
-
-
-
- Params: $unicode_string
-
- Return: $decoded_string
-
-
-
- This function attempts to decode a unicode (UTF-8) string by
-
- converting it into a single-byte-character string. Overlong
-
- characters are converted to their standard characters in place;
-
- non-overlong (aka multi-byte) characters are substituted with the
-
- 0xff; invalid encoding characters are left as-is.
-
-
-
- Note: this function is useful for dealing with the various unicode
-
- exploits/vulnerabilities found in web servers; it is *not* good for
-
- doing actual UTF-8 parsing, since characters over a single byte are
-
- basically dropped/replaced with a placeholder.
-
-
-
- =cut
-
-
-
- sub utils_unidecode_uri {
-
- my $str = $_[0];
-
- return $str if($str!~tr/!-~//c); # fastpath
-
- my ($lead,$count,$idx);
-
- my $out='';
-
- my $len = length($str);
-
- my ($ptr,$no,$nu)=(0,0,0);
-
-
-
- while($ptr < $len){
-
- my $c=substr($str,$ptr,1);
-
- if( ord($c) >= 0xc0 && ord($c) <= 0xfd){
-
- $count=0;
-
- $c=ord($c)<<1;
-
- while( ($c & 0x80) == 0x80){
-
- $c<<=1;
-
- last if($count++ ==4);
-
- }
-
- $c = ($c & 0xff);
-
- for( $idx=1; $idx<$count; $idx++){
-
- my $o=ord(substr($str,$ptr+$idx,1));
-
- $no=1 if($o != 0x80);
-
- $nu=1 if($o <0x80 || $o >0xbf);
-
- }
-
- my $o=ord(substr($str,$ptr+$idx,1));
-
- $nu=1 if( $o < 0x80 || $o > 0xbf);
-
- if($nu){
-
- $out.=substr($str,$ptr++,1);
-
- } else {
-
- if($no){
-
- $out.="\xff"; # generic replacement char
-
- } else {
-
- my $prior=ord(substr($str,$ptr+$count-1,1))<<6;
-
- $out.= pack("C", (ord(substr($str,$ptr+$count,1) )&0x7f)+$prior);
-
- }
-
- $ptr += $count+1;
-
- }
-
- $no=$nu=0;
-
- } else {
-
- $out.=$c;
-
- $ptr++;
-
- }
-
- }
-
- return $out;
-
- }
-
-
-
- #################################################################
-
-
-
- =pod
-
-
-
- =head1 - Function: LW::utils_text_wrapper
-
-
-
- Params: $long_text_string [, $crlf, $width ]
-
- Return: $formatted_test_string
-
-
-
- This is a simple function used to format a long line of text for
-
- display on a typical limited-character screen, such as a unix
-
- shell console.
-
-
-
- $crlf defaults to "\n", and $width defaults to 76.
-
-
-
- =cut
-
-
-
- sub utils_text_wrapper {
-
- my ($out,$w,$str,$crlf,$width)=('',0,@_);
-
- $crlf||="\n"; $width||=76;
-
- $str.=$crlf if($str!~/$crlf$/);
-
- return $str if(length($str)<=$width);
-
- while(length($str)>$width){
-
- my $w1=rindex($str,' ',$width);
-
- my $w2=rindex($str,"\t",$width);
-
- if($w1>$w2){ $w=$w1; } else { $w=$w2; }
-
- if($w==-1){ $w=$width;
-
- } else { substr($str,$w,1)=''; }
-
- $out.=substr($str,0,$w,'');
-
- $out.=$crlf;
-
- }
-
- return $out.$str;
-
- }
-
-
-
- #################################################################
-
-
-
- 1;
-